summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-05-16 16:27:24 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-05-16 16:27:27 +1000
commit64acc0549fea1b404b88c1354813886364902488 (patch)
treea90961be933d562e42a0e592ead836a225653e65
parent0856ad1ef175c45dc6a0bc629ca355fdfbb1001e (diff)
parent6456b00c4f4ac3774506ff54242a8849172cb650 (diff)
Merge branch 'akpm-current/current'
Conflicts: arch/x86/kernel/apic/hw_nmi.c mm/memblock.c mm/memcontrol.c
-rw-r--r--CREDITS7
-rw-r--r--Documentation/CodingStyle22
-rw-r--r--Documentation/SubmittingPatches22
-rw-r--r--Documentation/cgroups/memory.txt62
-rw-r--r--Documentation/devicetree/bindings/rtc/xgene-rtc.txt28
-rw-r--r--Documentation/filesystems/vfat.txt15
-rw-r--r--Documentation/kernel-parameters.txt19
-rw-r--r--Documentation/kmemleak.txt1
-rw-r--r--Documentation/leds/leds-class.txt3
-rw-r--r--Documentation/memory-hotplug.txt125
-rw-r--r--Documentation/printk-formats.txt6
-rw-r--r--Documentation/sysctl/kernel.txt21
-rw-r--r--Documentation/sysctl/vm.txt17
-rw-r--r--Documentation/vm/remap_file_pages.txt27
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/scatterlist.h6
-rw-r--r--arch/arc/kernel/troubleshoot.c10
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/scatterlist.h12
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/apm-storm.dtsi21
-rw-r--r--arch/blackfin/include/asm/unistd.h1
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/scatterlist.h6
-rw-r--r--arch/cris/include/asm/unistd.h1
-rw-r--r--arch/frv/include/asm/Kbuild1
-rw-r--r--arch/frv/include/asm/scatterlist.h6
-rw-r--r--arch/frv/include/asm/unistd.h1
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/scatterlist.h7
-rw-r--r--arch/ia64/include/asm/topology.h3
-rw-r--r--arch/m32r/include/asm/Kbuild1
-rw-r--r--arch/m32r/include/asm/scatterlist.h6
-rw-r--r--arch/m68k/include/asm/signal.h9
-rw-r--r--arch/m68k/include/asm/unistd.h1
-rw-r--r--arch/m68k/kernel/sys_m68k.c18
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/scatterlist.h1
-rw-r--r--arch/microblaze/include/asm/unistd.h1
-rw-r--r--arch/mips/dec/Makefile2
-rw-r--r--arch/mips/dec/platform.c44
-rw-r--r--arch/mips/include/asm/unistd.h1
-rw-r--r--arch/mips/kernel/traps.c2
-rw-r--r--arch/mips/mm/c-octeon.c2
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/scatterlist.h16
-rw-r--r--arch/mn10300/include/asm/unistd.h1
-rw-r--r--arch/parisc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/pgtable.h6
-rw-r--r--arch/powerpc/include/asm/scatterlist.h17
-rw-r--r--arch/powerpc/include/asm/topology.h8
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/mm/subpage-prot.c6
-rw-r--r--arch/powerpc/platforms/44x/warp.c1
-rw-r--r--arch/powerpc/platforms/52xx/efika.c1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/scatterlist.h6
-rw-r--r--arch/sh/include/asm/unistd.h1
-rw-r--r--arch/sh/kernel/hw_breakpoint.c4
-rw-r--r--arch/sh/kernel/kprobes.c30
-rw-r--r--arch/sh/kernel/localtimer.c2
-rw-r--r--arch/sh/kernel/perf_event.c8
-rw-r--r--arch/sh/kernel/smp.c2
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/irq_64.h2
-rw-r--r--arch/sparc/include/asm/scatterlist.h8
-rw-r--r--arch/sparc/include/asm/unistd.h1
-rw-r--r--arch/sparc/kernel/process_64.c18
-rw-r--r--arch/tile/mm/homecache.c2
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/unicore32/mm/ioremap.c4
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h59
-rw-r--r--arch/x86/include/asm/pgtable.h20
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h66
-rw-r--r--arch/x86/include/asm/scatterlist.h8
-rw-r--r--arch/x86/include/asm/signal.h6
-rw-r--r--arch/x86/include/asm/swiotlb.h7
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c17
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--arch/x86/kernel/pci-swiotlb.c9
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/mm/init_64.c34
-rw-r--r--arch/x86/mm/numa.c6
-rw-r--r--arch/x86/mm/pageattr-test.c2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c6
-rw-r--r--block/genhd.c2
-rw-r--r--drivers/base/Kconfig2
-rw-r--r--drivers/base/dma-contiguous.c42
-rw-r--r--drivers/base/memory.c12
-rw-r--r--drivers/block/brd.c16
-rw-r--r--drivers/block/zram/zram_drv.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c6
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile3
-rw-r--r--drivers/input/input.c6
-rw-r--r--drivers/input/leds.c249
-rw-r--r--drivers/iommu/intel-iommu.c33
-rw-r--r--drivers/leds/Kconfig3
-rw-r--r--drivers/misc/ti-st/st_core.c2
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/rapidio/devices/tsi721.c11
-rw-r--r--drivers/rtc/Kconfig41
-rw-r--r--drivers/rtc/Makefile4
-rw-r--r--drivers/rtc/interface.c14
-rw-r--r--drivers/rtc/rtc-88pm860x.c3
-rw-r--r--drivers/rtc/rtc-at91rm9200.c14
-rw-r--r--drivers/rtc/rtc-bfin.c16
-rw-r--r--drivers/rtc/rtc-cmos.c85
-rw-r--r--drivers/rtc/rtc-da9052.c122
-rw-r--r--drivers/rtc/rtc-da9063.c333
-rw-r--r--drivers/rtc/rtc-ds1343.c679
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/rtc/rtc-efi.c2
-rw-r--r--drivers/rtc/rtc-hym8563.c2
-rw-r--r--drivers/rtc/rtc-isl12057.c2
-rw-r--r--drivers/rtc/rtc-m41t80.c104
-rw-r--r--drivers/rtc/rtc-mcp795.c199
-rw-r--r--drivers/rtc/rtc-mv.c2
-rw-r--r--drivers/rtc/rtc-omap.c71
-rw-r--r--drivers/rtc/rtc-palmas.c2
-rw-r--r--drivers/rtc/rtc-puv3.c4
-rw-r--r--drivers/rtc/rtc-xgene.c278
-rw-r--r--drivers/tty/Kconfig4
-rw-r--r--drivers/tty/sysrq.c21
-rw-r--r--drivers/tty/vt/keyboard.c110
-rw-r--r--drivers/video/backlight/backlight.c2
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c7
-rw-r--r--fs/affs/affs.h16
-rw-r--r--fs/affs/amigaffs.c15
-rw-r--r--fs/affs/bitmap.c21
-rw-r--r--fs/affs/dir.c8
-rw-r--r--fs/affs/file.c46
-rw-r--r--fs/affs/inode.c14
-rw-r--r--fs/affs/namei.c39
-rw-r--r--fs/affs/super.c43
-rw-r--r--fs/affs/symlink.c2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/befs/btree.c11
-rw-r--r--fs/befs/datastream.c2
-rw-r--r--fs/befs/linuxvfs.c29
-rw-r--r--fs/binfmt_elf.c25
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c48
-rw-r--r--fs/block_dev.c63
-rw-r--r--fs/btrfs/extent_io.c11
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/buffer.c49
-rw-r--r--fs/cachefiles/bind.c17
-rw-r--r--fs/cachefiles/daemon.c31
-rw-r--r--fs/cachefiles/interface.c3
-rw-r--r--fs/cachefiles/internal.h30
-rw-r--r--fs/cachefiles/main.c9
-rw-r--r--fs/cachefiles/namei.c53
-rw-r--r--fs/cachefiles/security.c10
-rw-r--r--fs/cachefiles/xattr.c10
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/debugfs.c6
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifssmb.c20
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2misc.c38
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smb2pdu.h28
-rw-r--r--fs/coda/cnode.c4
-rw-r--r--fs/coda/coda_linux.h8
-rw-r--r--fs/coda/dir.c18
-rw-r--r--fs/coda/inode.c29
-rw-r--r--fs/coda/psdev.c39
-rw-r--r--fs/coda/upcall.c14
-rw-r--r--fs/configfs/configfs_internal.h6
-rw-r--r--fs/configfs/dir.c8
-rw-r--r--fs/configfs/inode.c5
-rw-r--r--fs/configfs/item.c58
-rw-r--r--fs/configfs/mount.c4
-rw-r--r--fs/devpts/inode.c26
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/f2fs/checkpoint.c3
-rw-r--r--fs/f2fs/node.c2
-rw-r--r--fs/fat/cache.c70
-rw-r--r--fs/fat/fat.h9
-rw-r--r--fs/fat/file.c78
-rw-r--r--fs/fat/inode.c429
-rw-r--r--fs/fscache/cache.c13
-rw-r--r--fs/fscache/cookie.c2
-rw-r--r--fs/fscache/histogram.c6
-rw-r--r--fs/fscache/internal.h26
-rw-r--r--fs/fscache/main.c7
-rw-r--r--fs/fscache/netfs.c7
-rw-r--r--fs/fscache/object-list.c8
-rw-r--r--fs/fscache/operation.c3
-rw-r--r--fs/fscache/page.c6
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/meta_io.c4
-rw-r--r--fs/hfsplus/attributes.c36
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c42
-rw-r--r--fs/hfsplus/hfsplus_fs.h7
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/xattr.c51
-rw-r--r--fs/hfsplus/xattr_security.c49
-rw-r--r--fs/hfsplus/xattr_trusted.c32
-rw-r--r--fs/hfsplus/xattr_user.c32
-rw-r--r--fs/hugetlbfs/inode.c18
-rw-r--r--fs/isofs/Makefile2
-rw-r--r--fs/isofs/compress.c17
-rw-r--r--fs/isofs/export.c11
-rw-r--r--fs/isofs/inode.c88
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/isofs/rock.c44
-rw-r--r--fs/jffs2/background.c12
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/super.c45
-rw-r--r--fs/mpage.c107
-rw-r--r--fs/notify/fanotify/fanotify_user.c20
-rw-r--r--fs/notify/mark.c2
-rw-r--r--fs/ntfs/attrib.c1
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c1
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ntfs/sysctl.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c33
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmlock.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c20
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c17
-rw-r--r--fs/ocfs2/namei.c145
-rw-r--r--fs/ocfs2/refcounttree.c7
-rw-r--r--fs/ocfs2/stackglue.c2
-rw-r--r--fs/ocfs2/super.c8
-rw-r--r--fs/ocfs2/uptodate.c2
-rw-r--r--fs/proc/task_mmu.c287
-rw-r--r--fs/pstore/platform.c19
-rw-r--r--fs/pstore/ram_core.c36
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/bitmap.c13
-rw-r--r--fs/reiserfs/stree.c8
-rw-r--r--fs/squashfs/squashfs.h2
-rw-r--r--fs/ufs/balloc.c6
-rw-r--r--include/asm-generic/ioctl.h5
-rw-r--r--include/asm-generic/pgtable.h8
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/bootmem.h6
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/compaction.h4
-rw-r--r--include/linux/cpuset.h29
-rw-r--r--include/linux/crc64_ecma.h56
-rw-r--r--include/linux/dma-contiguous.h9
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/gfp.h14
-rw-r--r--include/linux/hugetlb.h12
-rw-r--r--include/linux/hugetlb_inline.h7
-rw-r--r--include/linux/idr.h13
-rw-r--r--include/linux/input.h21
-rw-r--r--include/linux/jump_label.h20
-rw-r--r--include/linux/kmemleak.h4
-rw-r--r--include/linux/mc146818rtc.h4
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/memcontrol.h41
-rw-r--r--include/linux/memory_hotplug.h14
-rw-r--r--include/linux/migrate.h11
-rw-r--r--include/linux/mm.h53
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/mmdebug.h15
-rw-r--r--include/linux/mmzone.h35
-rw-r--r--include/linux/nmi.h11
-rw-r--r--include/linux/page-flags.h5
-rw-r--r--include/linux/pageblock-flags.h30
-rw-r--r--include/linux/pagemap.h139
-rw-r--r--include/linux/plist.h45
-rw-r--r--include/linux/printk.h23
-rw-r--r--include/linux/proc_fs.h4
-rw-r--r--include/linux/ptrace.h32
-rw-r--r--include/linux/res_counter.h40
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/linux/sched.h14
-rw-r--r--include/linux/sched/sysctl.h4
-rw-r--r--include/linux/shm.h3
-rw-r--r--include/linux/signal.h21
-rw-r--r--include/linux/slab.h20
-rw-r--r--include/linux/string.h1
-rw-r--r--include/linux/swap.h30
-rw-r--r--include/linux/swapfile.h2
-rw-r--r--include/linux/swapops.h2
-rw-r--r--include/linux/swiotlb.h2
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/linux/topology.h3
-rw-r--r--include/linux/vm_event_item.h4
-rw-r--r--include/linux/vmstat.h6
-rw-r--r--include/linux/zbud.h2
-rw-r--r--include/scsi/scsi.h2
-rw-r--r--include/trace/events/compaction.h25
-rw-r--r--include/trace/events/gfpflags.h1
-rw-r--r--include/trace/events/vmscan.h19
-rw-r--r--include/uapi/linux/shm.h17
-rw-r--r--init/Kconfig23
-rw-r--r--init/main.c77
-rw-r--r--ipc/compat.c2
-rw-r--r--ipc/compat_mq.c2
-rw-r--r--ipc/msg.c259
-rw-r--r--ipc/sem.c22
-rw-r--r--ipc/shm.c23
-rw-r--r--ipc/util.c12
-rw-r--r--ipc/util.h10
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/backtracetest.c18
-rw-r--r--kernel/capability.c6
-rw-r--r--kernel/compat.c8
-rw-r--r--kernel/cpu.c31
-rw-r--r--kernel/cpuset.c14
-rw-r--r--kernel/exec_domain.c14
-rw-r--r--kernel/exit.c61
-rw-r--r--kernel/fork.c20
-rw-r--r--kernel/gcov/base.c6
-rw-r--r--kernel/gcov/gcc_4_7.c5
-rw-r--r--kernel/hung_task.c4
-rw-r--r--kernel/kexec.c70
-rw-r--r--kernel/kmod.c5
-rw-r--r--kernel/latencytop.c5
-rw-r--r--kernel/panic.c23
-rw-r--r--kernel/posix-timers.c57
-rw-r--r--kernel/printk/printk.c330
-rw-r--r--kernel/profile.c20
-rw-r--r--kernel/reboot.c21
-rw-r--r--kernel/res_counter.c9
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/deadline.c7
-rw-r--r--kernel/sched/rt.c8
-rw-r--r--kernel/seccomp.c4
-rw-r--r--kernel/signal.c94
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/stop_machine.c26
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/sysctl.c92
-rw-r--r--kernel/time/ntp.c20
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/timekeeping.c7
-rw-r--r--kernel/tracepoint.c2
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/user_namespace.c33
-rw-r--r--kernel/utsname_sysctl.c4
-rw-r--r--lib/Kconfig14
-rw-r--r--lib/Kconfig.debug25
-rw-r--r--lib/Makefile1
-rw-r--r--lib/btree.c1
-rw-r--r--lib/crc64_ecma.c341
-rw-r--r--lib/idr.c40
-rw-r--r--lib/libcrc32c.c5
-rw-r--r--lib/plist.c56
-rw-r--r--lib/radix-tree.c12
-rw-r--r--lib/scatterlist.c4
-rw-r--r--lib/string.c8
-rw-r--r--lib/swiotlb.c2
-rw-r--r--lib/vsprintf.c24
-rw-r--r--lib/xz/Kconfig24
-rw-r--r--lib/xz/xz_dec_lzma2.c4
-rw-r--r--mm/Makefile2
-rw-r--r--mm/bounce.c7
-rw-r--r--mm/compaction.c232
-rw-r--r--mm/dmapool.c27
-rw-r--r--mm/filemap.c233
-rw-r--r--mm/fremap.c282
-rw-r--r--mm/frontswap.c13
-rw-r--r--mm/gup.c662
-rw-r--r--mm/huge_memory.c34
-rw-r--r--mm/hugetlb.c336
-rw-r--r--mm/internal.h31
-rw-r--r--mm/kmemleak.c40
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memblock.c36
-rw-r--r--mm/memcontrol.c631
-rw-r--r--mm/memory-failure.c35
-rw-r--r--mm/memory.c660
-rw-r--r--mm/memory_hotplug.c148
-rw-r--r--mm/mempolicy.c294
-rw-r--r--mm/mempool.c8
-rw-r--r--mm/migrate.c61
-rw-r--r--mm/mmap.c96
-rw-r--r--mm/nobootmem.c2
-rw-r--r--mm/nommu.c13
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c344
-rw-r--r--mm/page_io.c21
-rw-r--r--mm/pagewalk.c375
-rw-r--r--mm/rmap.c46
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.c45
-rw-r--r--mm/slab.h48
-rw-r--r--mm/slab_common.c91
-rw-r--r--mm/slob.c3
-rw-r--r--mm/slub.c225
-rw-r--r--mm/swap.c238
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/swapfile.c224
-rw-r--r--mm/util.c30
-rw-r--r--mm/vmacache.c22
-rw-r--r--mm/vmalloc.c12
-rw-r--r--mm/vmscan.c202
-rw-r--r--mm/vmstat.c12
-rw-r--r--mm/zbud.c4
-rw-r--r--mm/zsmalloc.c2
-rwxr-xr-xscripts/checkpatch.pl74
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/sysctl/Makefile19
-rw-r--r--tools/testing/selftests/sysctl/common_tests109
-rw-r--r--tools/testing/selftests/sysctl/run_numerictests10
-rw-r--r--tools/testing/selftests/sysctl/run_stringtests77
-rw-r--r--usr/Kconfig77
439 files changed, 10095 insertions, 5605 deletions
diff --git a/CREDITS b/CREDITS
index c322dcfb926d..ce275b6f4873 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1377,6 +1377,9 @@ S: 17 rue Danton
S: F - 94270 Le Kremlin-Bicêtre
S: France
+N: Jack Hammer
+D: IBM ServeRAID RAID (ips) driver maintenance
+
N: Greg Hankins
E: gregh@cc.gatech.edu
D: fixed keyboard driver to separate LED and locking status
@@ -1687,6 +1690,10 @@ S: Reading
S: RG6 2NU
S: United Kingdom
+N: Dave Jeffery
+E: dhjeffery@gmail.com
+D: SCSI hacks and IBM ServeRAID RAID driver maintenance
+
N: Jakub Jelinek
E: jakub@redhat.com
W: http://sunsite.mff.cuni.cz/~jj
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 7fe0546c504a..6b6bef31e956 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -660,15 +660,23 @@ There are a number of driver model diagnostic macros in <linux/device.h>
which you should use to make sure messages are matched to the right device
and driver, and are tagged with the right level: dev_err(), dev_warn(),
dev_info(), and so forth. For messages that aren't associated with a
-particular device, <linux/printk.h> defines pr_debug() and pr_info().
+particular device, <linux/printk.h> defines pr_notice(), pr_info(),
+pr_warn(), pr_err(), etc.
Coming up with good debugging messages can be quite a challenge; and once
-you have them, they can be a huge help for remote troubleshooting. Such
-messages should be compiled out when the DEBUG symbol is not defined (that
-is, by default they are not included). When you use dev_dbg() or pr_debug(),
-that's automatic. Many subsystems have Kconfig options to turn on -DDEBUG.
-A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the
-ones already enabled by DEBUG.
+you have them, they can be a huge help for remote troubleshooting. However
+debug message printing is handled differently than printing other non-debug
+messages. While the other pr_XXX() functions print unconditionally,
+pr_debug() does not; it is compiled out by default, unless either DEBUG is
+defined or CONFIG_DYNAMIC_DEBUG is set. That is true for dev_dbg() also,
+and a related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to
+the ones already enabled by DEBUG.
+
+Many subsystems have Kconfig debug options to turn on -DDEBUG in the
+corresponding Makefile; in other cases specific files #define DEBUG. And
+when a debug message should be unconditionally printed, such as if it is
+already inside a debug-related #ifdef secton, printk(KERN_DEBUG ...) can be
+used.
Chapter 14: Allocating memory
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 2a8e89e13e45..7e9abb8a276b 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -132,6 +132,20 @@ Example:
platform_set_drvdata(), but left the variable "dev" unused,
delete it.
+If your patch fixes a bug in a specific commit, e.g. you found an issue using
+git-bisect, please use the 'Fixes:' tag with the first 12 characters of the
+SHA-1 ID, and the one line summary.
+Example:
+
+ Fixes: e21d2170f366 ("video: remove unnecessary platform_set_drvdata()")
+
+The following git-config settings can be used to add a pretty format for
+outputting the above style in the git log or git show commands
+
+ [core]
+ abbrev = 12
+ [pretty]
+ fixes = Fixes: %h (\"%s\")
3) Separate your changes.
@@ -443,7 +457,7 @@ person it names. This tag documents that potentially interested parties
have been included in the discussion
-14) Using Reported-by:, Tested-by:, Reviewed-by: and Suggested-by:
+14) Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
If this patch fixes a problem reported by somebody else, consider adding a
Reported-by: tag to credit the reporter for their contribution. Please
@@ -498,6 +512,12 @@ idea was not posted in a public forum. That said, if we diligently credit our
idea reporters, they will, hopefully, be inspired to help us again in the
future.
+A Fixes: tag indicates that the patch fixes an issue in a previous commit. It
+is used to make it easy to determine where a bug originated, which can help
+review a bug fix. This tag also assists the stable kernel team in determining
+which stable kernel versions should receive your fix. This is the preferred
+method for indicating a bug fixed by the patch. See #2 above for more details.
+
15) The canonical patch format
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 2622115276aa..8026bb70a7b8 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -57,6 +57,7 @@ Brief summary of control files.
memory.memsw.usage_in_bytes # show current res_counter usage for memory+Swap
(See 5.5 for details)
memory.limit_in_bytes # set/show limit of memory usage
+ memory.low_limit_in_bytes # set/show low limit for memory reclaim
memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
memory.failcnt # show the number of memory usage hits limits
memory.memsw.failcnt # show the number of memory+Swap hits limits
@@ -236,23 +237,26 @@ it by cgroup.
2.5 Reclaim
Each cgroup maintains a per cgroup LRU which has the same structure as
-global VM. When a cgroup goes over its limit, we first try
-to reclaim memory from the cgroup so as to make space for the new
-pages that the cgroup has touched. If the reclaim is unsuccessful,
-an OOM routine is invoked to select and kill the bulkiest task in the
-cgroup. (See 10. OOM Control below.)
-
-The reclaim algorithm has not been modified for cgroups, except that
-pages that are selected for reclaiming come from the per-cgroup LRU
-list.
-
-NOTE: Reclaim does not work for the root cgroup, since we cannot set any
-limits on the root cgroup.
-
-Note2: When panic_on_oom is set to "2", the whole system will panic.
-
-When oom event notifier is registered, event will be delivered.
-(See oom_control section)
+global VM. Cgroups can get reclaimed basically under two conditions
+ - under global memory pressure when all cgroups are reclaimed
+ proportionally wrt. their LRU size in a round robin fashion
+ - when a cgroup or its hierarchical parent (see 6. Hierarchical support)
+ hits hard limit. If the reclaim is unsuccessful, an OOM routine is invoked
+ to select and kill the bulkiest task in the hiearchy. (See 10. OOM Control
+ below.)
+
+Groups might be also protected from both global and limit reclaim by
+low_limit_in_bytes knob. If the limit is non-zero the reclaim logic
+doesn't include groups (and their subgroups - see 6. Hierarchy support)
+which are bellow the low limit if there is other eligible cgroup in the
+reclaimed hierarchy. If all groups which participate reclaim are under
+their low limits then all of them are reclaimed and the low limit is
+ignored.
+
+Note: When panic_on_oom is set to "2", the whole system will panic.
+
+When oom event notifier is registered, event will be delivered to the root
+of the memory pressure which cannot be handled (See oom_control section)
2.6 Locking
@@ -270,6 +274,11 @@ When oom event notifier is registered, event will be delivered.
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
+WARNING: Current implementation lacks reclaim support. That means allocation
+ attempts will fail when close to the limit even if there are plenty of
+ kmem available for reclaim. That makes this option unusable in real
+ life so DO NOT SELECT IT unless for development purposes.
+
With the Kernel memory extension, the Memory Controller is able to limit
the amount of kernel memory used by the system. Kernel memory is fundamentally
different than user memory, since it can't be swapped out, which makes it
@@ -472,6 +481,9 @@ About use_hierarchy, see Section 6.
write will still return success. In this case, it is expected that
memory.kmem.usage_in_bytes == memory.usage_in_bytes.
+ Please note that this knob is considered deprecated and will be removed
+ in the future.
+
About use_hierarchy, see Section 6.
5.2 stat file
@@ -535,16 +547,13 @@ Note:
5.3 swappiness
-Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
-Please note that unlike the global swappiness, memcg knob set to 0
-really prevents from any swapping even if there is a swap storage
-available. This might lead to memcg OOM killer if there are no file
-pages to reclaim.
+Overrides /proc/sys/vm/swappiness for the particular group. The tunable
+in the root cgroup corresponds to the global swappiness setting.
-Following cgroups' swappiness can't be changed.
-- root cgroup (uses /proc/sys/vm/swappiness).
-- a cgroup which uses hierarchy and it has other cgroup(s) below it.
-- a cgroup which uses hierarchy and not the root of hierarchy.
+Please note that unlike during the global reclaim, limit reclaim
+enforces that 0 swappiness really prevents from any swapping even if
+there is a swap storage available. This might lead to memcg OOM killer
+if there are no file pages to reclaim.
5.4 failcnt
@@ -754,7 +763,6 @@ You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
#echo 1 > memory.oom_control
-This operation is only allowed to the top cgroup of a sub-hierarchy.
If OOM-killer is disabled, tasks under cgroup will hang/sleep
in memory cgroup's OOM-waitqueue when they request accountable memory.
diff --git a/Documentation/devicetree/bindings/rtc/xgene-rtc.txt b/Documentation/devicetree/bindings/rtc/xgene-rtc.txt
new file mode 100644
index 000000000000..fd195c358446
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/xgene-rtc.txt
@@ -0,0 +1,28 @@
+* APM X-Gene Real Time Clock
+
+RTC controller for the APM X-Gene Real Time Clock
+
+Required properties:
+- compatible : Should be "apm,xgene-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: IRQ line for the RTC.
+- #clock-cells: Should be 1.
+- clocks: Reference to the clock entry.
+
+Example:
+
+rtcclk: rtcclk {
+ compatible = "fixed-clock";
+ #clock-cells = <1>;
+ clock-frequency = <100000000>;
+ clock-output-names = "rtcclk";
+};
+
+rtc: rtc@10510000 {
+ compatible = "apm,xgene-rtc";
+ reg = <0x0 0x10510000 0x0 0x400>;
+ interrupts = <0x0 0x46 0x4>;
+ #clock-cells = <1>;
+ clocks = <&rtcclk 0>;
+};
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 4a93e98b290a..223c32171dcc 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -172,9 +172,24 @@ nfs=stale_rw|nostale_ro
To maintain backward compatibility, '-o nfs' is also accepted,
defaulting to stale_rw
+dos1xfloppy -- If set, use a fallback default BIOS Parameter Block
+ configuration, determined by backing device size. These static
+ parameters match defaults assumed by DOS 1.x for 160 kiB,
+ 180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
+
<bool>: 0,1,yes,no,true,false
+LIMITATION
+---------------------------------------------------------------------
+* The fallocated region of file is discarded at umount/evict time
+ when using fallocate with FALLOC_FL_KEEP_SIZE.
+ So, User should assume that fallocated region can be discarded at
+ last close if there is memory pressure resulting in eviction of
+ the inode from the memory. As a result, for any dependency on
+ the fallocated region, user should make sure to recheck fallocate
+ after reopening the file.
+
TODO
----------------------------------------------------------------------
* Need to get rid of the raw scanning stuff. Instead, always use
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 25a8bde18016..15b8469a7e14 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -627,8 +627,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Also note the kernel might malfunction if you disable
some critical bits.
- cma=nn[MG] [ARM,KNL]
- Sets the size of kernel global memory area for contiguous
+ cma=nn[MG]@[start[MG][-end[MG]]]
+ [ARM,X86,KNL]
+ Sets the size of kernel global memory area for
+ contiguous memory allocations and optionally the
+ placement constraint by the physical address range of
memory allocations. For more information, see
include/linux/dma-contiguous.h
@@ -1306,6 +1309,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
for working out where the kernel is dying during
startup.
+ initcall_blacklist= [KNL] Do not execute a comma-separated list of
+ initcall functions. Useful for debugging built-in
+ modules and initcalls.
+
initrd= [BOOT] Specify the location of the initial ramdisk
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
@@ -2351,6 +2358,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
timeout < 0: reboot immediately
Format: <timeout>
+ crash_kexec_post_notifiers
+ Run kdump after running panic-notifiers and dumping
+ kmsg. This only for the users who doubt kdump always
+ succeeds in any situation.
+ Note that this also increases risks of kdump failure,
+ because some panic notifiers can make the crashed
+ kernel more unstable.
+
parkbd.port= [HW] Parallel port number the keyboard adapter is
connected to, default is 0.
Format: <parport#>
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index a7563ec4ea7b..b772418bf064 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -142,6 +142,7 @@ kmemleak_alloc_percpu - notify of a percpu memory block allocation
kmemleak_free - notify of a memory block freeing
kmemleak_free_part - notify of a partial memory block freeing
kmemleak_free_percpu - notify of a percpu memory block freeing
+kmemleak_update_trace - update object allocation stack trace
kmemleak_not_leak - mark an object as not a leak
kmemleak_ignore - do not scan or report an object as leak
kmemleak_scan_area - add scan areas inside a memory block
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
index 79699c200766..62261c04060a 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.txt
@@ -2,9 +2,6 @@
LED handling under Linux
========================
-If you're reading this and thinking about keyboard leds, these are
-handled by the input subsystem and the led class is *not* needed.
-
In its simplest form, the LED class just allows control of LEDs from
userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
LED is defined in max_brightness file. The brightness file will set the brightness
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 58340d50f8a6..f304edb8fbe7 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -88,16 +88,21 @@ phase by hand.
1.3. Unit of Memory online/offline operation
------------
-Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
-into chunks of the same size. The chunk is called a "section". The size of
-a section is architecture dependent. For example, power uses 16MiB, ia64 uses
-1GiB. The unit of online/offline operation is "one section". (see Section 3.)
+Memory hotplug uses SPARSEMEM memory model which allows memory to be divided
+into chunks of the same size. These chunks are called "sections". The size of
+a memory section is architecture dependent. For example, power uses 16MiB, ia64
+uses 1GiB.
-To determine the size of sections, please read this file:
+Memory sections are combined into chunks referred to as "memory blocks". The
+size of a memory block is architecture dependent and represents the logical
+unit upon which memory online/offline operations are to be performed. The
+default size of a memory block is the same as memory section size unless an
+architecture specifies otherwise. (see Section 3.)
+
+To determine the size (in bytes) of a memory block please read this file:
/sys/devices/system/memory/block_size_bytes
-This file shows the size of sections in byte.
-----------------------
2. Kernel Configuration
@@ -123,42 +128,35 @@ config options.
(CONFIG_ACPI_CONTAINER).
This option can be kernel module too.
+
--------------------------------
-4 sysfs files for memory hotplug
+3 sysfs files for memory hotplug
--------------------------------
-All sections have their device information in sysfs. Each section is part of
-a memory block under /sys/devices/system/memory as
+All memory blocks have their device information in sysfs. Each memory block
+is described under /sys/devices/system/memory as
/sys/devices/system/memory/memoryXXX
-(XXX is the section id.)
+(XXX is the memory block id.)
-Now, XXX is defined as (start_address_of_section / section_size) of the first
-section contained in the memory block. The files 'phys_index' and
-'end_phys_index' under each directory report the beginning and end section id's
-for the memory block covered by the sysfs directory. It is expected that all
+For the memory block covered by the sysfs directory. It is expected that all
memory sections in this range are present and no memory holes exist in the
range. Currently there is no way to determine if there is a memory hole, but
the existence of one should not affect the hotplug capabilities of the memory
block.
-For example, assume 1GiB section size. A device for a memory starting at
+For example, assume 1GiB memory block size. A device for a memory starting at
0x100000000 is /sys/device/system/memory/memory4
(0x100000000 / 1Gib = 4)
This device covers address range [0x100000000 ... 0x140000000)
-Under each section, you can see 4 or 5 files, the end_phys_index file being
-a recent addition and not present on older kernels.
+Under each memory block, you can see 4 files:
-/sys/devices/system/memory/memoryXXX/start_phys_index
-/sys/devices/system/memory/memoryXXX/end_phys_index
+/sys/devices/system/memory/memoryXXX/phys_index
/sys/devices/system/memory/memoryXXX/phys_device
/sys/devices/system/memory/memoryXXX/state
/sys/devices/system/memory/memoryXXX/removable
-'phys_index' : read-only and contains section id of the first section
- in the memory block, same as XXX.
-'end_phys_index' : read-only and contains section id of the last section
- in the memory block.
+'phys_index' : read-only and contains memory block id, same as XXX.
'state' : read-write
at read: contains online/offline state of memory.
at write: user can specify "online_kernel",
@@ -185,6 +183,7 @@ For example:
A backlink will also be created:
/sys/devices/system/memory/memory9/node0 -> ../../node/node0
+
--------------------------------
4. Physical memory hot-add phase
--------------------------------
@@ -227,11 +226,10 @@ You can tell the physical address of new memory to the kernel by
% echo start_address_of_new_memory > /sys/devices/system/memory/probe
-Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
-memory range is hot-added. In this case, hotplug script is not called (in
-current implementation). You'll have to online memory by yourself.
-Please see "How to online memory" in this text.
-
+Then, [start_address_of_new_memory, start_address_of_new_memory +
+memory_block_size] memory range is hot-added. In this case, hotplug script is
+not called (in current implementation). You'll have to online memory by
+yourself. Please see "How to online memory" in this text.
------------------------------
@@ -240,36 +238,36 @@ Please see "How to online memory" in this text.
5.1. State of memory
------------
-To see (online/offline) state of memory section, read 'state' file.
+To see (online/offline) state of a memory block, read 'state' file.
% cat /sys/device/system/memory/memoryXXX/state
-If the memory section is online, you'll read "online".
-If the memory section is offline, you'll read "offline".
+If the memory block is online, you'll read "online".
+If the memory block is offline, you'll read "offline".
5.2. How to online memory
------------
Even if the memory is hot-added, it is not at ready-to-use state.
-For using newly added memory, you have to "online" the memory section.
+For using newly added memory, you have to "online" the memory block.
-For onlining, you have to write "online" to the section's state file as:
+For onlining, you have to write "online" to the memory block's state file as:
% echo online > /sys/devices/system/memory/memoryXXX/state
-This onlining will not change the ZONE type of the target memory section,
-If the memory section is in ZONE_NORMAL, you can change it to ZONE_MOVABLE:
+This onlining will not change the ZONE type of the target memory block,
+If the memory block is in ZONE_NORMAL, you can change it to ZONE_MOVABLE:
% echo online_movable > /sys/devices/system/memory/memoryXXX/state
-(NOTE: current limit: this memory section must be adjacent to ZONE_MOVABLE)
+(NOTE: current limit: this memory block must be adjacent to ZONE_MOVABLE)
-And if the memory section is in ZONE_MOVABLE, you can change it to ZONE_NORMAL:
+And if the memory block is in ZONE_MOVABLE, you can change it to ZONE_NORMAL:
% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
-(NOTE: current limit: this memory section must be adjacent to ZONE_NORMAL)
+(NOTE: current limit: this memory block must be adjacent to ZONE_NORMAL)
-After this, section memoryXXX's state will be 'online' and the amount of
+After this, memory block XXX's state will be 'online' and the amount of
available memory will be increased.
Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
@@ -284,22 +282,22 @@ This may be changed in future.
6.1 Memory offline and ZONE_MOVABLE
------------
Memory offlining is more complicated than memory online. Because memory offline
-has to make the whole memory section be unused, memory offline can fail if
-the section includes memory which cannot be freed.
+has to make the whole memory block be unused, memory offline can fail if
+the memory block includes memory which cannot be freed.
In general, memory offline can use 2 techniques.
-(1) reclaim and free all memory in the section.
-(2) migrate all pages in the section.
+(1) reclaim and free all memory in the memory block.
+(2) migrate all pages in the memory block.
In the current implementation, Linux's memory offline uses method (2), freeing
-all pages in the section by page migration. But not all pages are
+all pages in the memory block by page migration. But not all pages are
migratable. Under current Linux, migratable pages are anonymous pages and
-page caches. For offlining a section by migration, the kernel has to guarantee
-that the section contains only migratable pages.
+page caches. For offlining a memory block by migration, the kernel has to
+guarantee that the memory block contains only migratable pages.
-Now, a boot option for making a section which consists of migratable pages is
-supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
+Now, a boot option for making a memory block which consists of migratable pages
+is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
create ZONE_MOVABLE...a zone which is just used for movable pages.
(See also Documentation/kernel-parameters.txt)
@@ -315,28 +313,27 @@ creates ZONE_MOVABLE as following.
Size of memory for movable pages (for offline) is ZZZZ.
-Note) Unfortunately, there is no information to show which section belongs
+Note: Unfortunately, there is no information to show which memory block belongs
to ZONE_MOVABLE. This is TBD.
6.2. How to offline memory
------------
-You can offline a section by using the same sysfs interface that was used in
-memory onlining.
+You can offline a memory block by using the same sysfs interface that was used
+in memory onlining.
% echo offline > /sys/devices/system/memory/memoryXXX/state
-If offline succeeds, the state of the memory section is changed to be "offline".
+If offline succeeds, the state of the memory block is changed to be "offline".
If it fails, some error core (like -EBUSY) will be returned by the kernel.
-Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
-If it doesn't contain 'unmovable' memory, you'll get success.
+Even if a memory block does not belong to ZONE_MOVABLE, you can try to offline
+it. If it doesn't contain 'unmovable' memory, you'll get success.
-A section under ZONE_MOVABLE is considered to be able to be offlined easily.
-But under some busy state, it may return -EBUSY. Even if a memory section
-cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
-offline it (or not).
-(For example, a page is referred to by some kernel internal call and released
- soon.)
+A memory block under ZONE_MOVABLE is considered to be able to be offlined
+easily. But under some busy state, it may return -EBUSY. Even if a memory
+block cannot be offlined due to -EBUSY, you can retry offlining it and may be
+able to offline it (or not). (For example, a page is referred to by some kernel
+internal call and released soon.)
Consideration:
Memory hotplug's design direction is to make the possibility of memory offlining
@@ -373,11 +370,11 @@ MEMORY_GOING_OFFLINE
Generated to begin the process of offlining memory. Allocations are no
longer possible from the memory but some of the memory to be offlined
is still in use. The callback can be used to free memory known to a
- subsystem from the indicated memory section.
+ subsystem from the indicated memory block.
MEMORY_CANCEL_OFFLINE
Generated if MEMORY_GOING_OFFLINE fails. Memory is available again from
- the section that we attempted to offline.
+ the memory block that we attempted to offline.
MEMORY_OFFLINE
Generated after offlining memory is complete.
@@ -413,8 +410,8 @@ node if necessary.
--------------
- allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
sysctl or new control file.
- - showing memory section and physical device relationship.
- - showing memory section is under ZONE_MOVABLE or not
+ - showing memory block and physical device relationship.
+ - showing memory block is under ZONE_MOVABLE or not
- test and make it better memory offlining.
- support HugeTLB page migration and offlining.
- memmap removing at memory offline.
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index b4498218c474..3b56a991f52e 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -184,6 +184,12 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.
+task_struct comm name:
+
+ %pT
+
+ For printing task_struct->comm.
+
struct va_format:
%pV
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 9886c3d57fc2..708bb7f1b7e0 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -77,6 +77,7 @@ show up in /proc/sys/kernel:
- shmmni
- stop-a [ SPARC only ]
- sysrq ==> Documentation/sysrq.txt
+- sysctl_writes_strict
- tainted
- threads-max
- unknown_nmi_panic
@@ -762,6 +763,26 @@ without users and with a dead originative process will be destroyed.
==============================================================
+sysctl_writes_strict:
+
+Control how file position affects the behavior of updating sysctl values
+via the /proc/sys interface:
+
+ -1 - Legacy per-write sysctl value handling, with no printk warnings.
+ Each write syscall must fully contain the sysctl value to be
+ written, and multiple writes on the same sysctl file descriptor
+ will rewrite the sysctl value, regardless of file position.
+ 0 - (default) Same behavior as above, but warn about processes that
+ perform writes to a sysctl file descriptor when the file position
+ is not 0.
+ 1 - Respect file position when writing sysctl strings. Multiple writes
+ will append to the sysctl value buffer. Anything past the max length
+ of the sysctl value buffer will be ignored. Writes to numeric sysctl
+ entries must always be at file position 0 and the value must be
+ fully contained in the buffer sent in the write syscall.
+
+==============================================================
+
tainted:
Non-zero if the kernel has been tainted. Numeric values, which
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index dd9d0e33b443..5b6da0fb5fbf 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -772,16 +772,17 @@ This is value ORed together of
2 = Zone reclaim writes dirty pages out
4 = Zone reclaim swaps pages
-zone_reclaim_mode is set during bootup to 1 if it is determined that pages
-from remote zones will cause a measurable performance reduction. The
-page allocator will then reclaim easily reusable pages (those page
-cache pages that are currently not used) before allocating off node pages.
-
-It may be beneficial to switch off zone reclaim if the system is
-used for a file server and all of memory should be used for caching files
-from disk. In that case the caching effect is more important than
+zone_reclaim_mode is disabled by default. For file servers or workloads
+that benefit from having their data cached, zone_reclaim_mode should be
+left disabled as the caching effect is likely to be more important than
data locality.
+zone_reclaim may be enabled if it's known that the workload is partitioned
+such that each partition fits within a NUMA node and that accessing remote
+memory would cause a measurable performance reduction. The page allocator
+will then reclaim easily reusable pages (those page cache pages that are
+currently not used) before allocating off node pages.
+
Allowing zone reclaim to write out pages stops processes that are
writing large amounts of data from dirtying pages on other nodes. Zone
reclaim will write out dirty pages if a zone fills up and so effectively
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
new file mode 100644
index 000000000000..f609142f406a
--- /dev/null
+++ b/Documentation/vm/remap_file_pages.txt
@@ -0,0 +1,27 @@
+The remap_file_pages() system call is used to create a nonlinear mapping,
+that is, a mapping in which the pages of the file are mapped into a
+nonsequential order in memory. The advantage of using remap_file_pages()
+over using repeated calls to mmap(2) is that the former approach does not
+require the kernel to create additional VMA (Virtual Memory Area) data
+structures.
+
+Supporting of nonlinear mapping requires significant amount of non-trivial
+code in kernel virtual memory subsystem including hot paths. Also to get
+nonlinear mapping work kernel need a way to distinguish normal page table
+entries from entries with file offset (pte_file). Kernel reserves flag in
+PTE for this purpose. PTE flags are scarce resource especially on some CPU
+architectures. It would be nice to free up the flag for other usage.
+
+Fortunately, there are not many users of remap_file_pages() in the wild.
+It's only known that one enterprise RDBMS implementation uses the syscall
+on 32-bit systems to map files bigger than can linearly fit into 32-bit
+virtual address space. This use-case is not critical anymore since 64-bit
+systems are widely available.
+
+The syscall is deprecated and replaced it with an emulation now. The
+emulation creates new VMAs instead of nonlinear mappings. It's going to
+work slower for rare users of remap_file_pages() but ABI is preserved.
+
+One side effect of emulation (apart from performance) is that user can hit
+vm.max_map_count limit more easily due to additional VMAs. See comment for
+DEFAULT_MAX_MAP_COUNT for more details on the limit.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7dd1ad4df316..58a4d9699a99 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -210,6 +210,13 @@ S: Supported
F: Documentation/scsi/aacraid.txt
F: drivers/scsi/aacraid/
+ABI/API
+L: linux-api@vger.kernel.org
+F: Documentation/ABI/
+F: include/linux/syscalls.h
+F: include/uapi/
+F: kernel/sys_ni.c
+
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
M: Hans de Goede <hdegoede@redhat.com>
L: lm-sensors@lm-sensors.org
@@ -4392,10 +4399,7 @@ F: drivers/scsi/ibmvscsi/
X: drivers/scsi/ibmvscsi/ibmvstgt.c
IBM ServeRAID RAID DRIVER
-P: Jack Hammer
-M: Dave Jeffery <ipslinux@adaptec.com>
-W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html
-S: Supported
+S: Orphan
F: drivers/scsi/ips.*
ICH LPC AND GPIO DRIVER
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 96e54bed5088..e858aa0ad8af 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
deleted file mode 100644
index 017d7471c3c4..000000000000
--- a/arch/alpha/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ALPHA_SCATTERLIST_H
-#define _ALPHA_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(_ALPHA_SCATTERLIST_H) */
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 73a7450ee622..1badf9b84b51 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -86,12 +86,13 @@ static void show_faulting_vma(unsigned long address, char *buf)
unsigned long ino = 0;
dev_t dev = 0;
char *nm = buf;
+ struct mm_struct *active_mm = current->active_mm;
/* can't use print_vma_addr() yet as it doesn't check for
* non-inclusive vma
*/
-
- vma = find_vma(current->active_mm, address);
+ down_read(&active_mm->mmap_sem);
+ vma = find_vma(active_mm, address);
/* check against the find_vma( ) behaviour which returns the next VMA
* if the container VMA is not found
@@ -110,9 +111,10 @@ static void show_faulting_vma(unsigned long address, char *buf)
vma->vm_start < TASK_UNMAPPED_BASE ?
address : address - vma->vm_start,
nm, vma->vm_start, vma->vm_end);
- } else {
+ } else
pr_info(" @No matching VMA found\n");
- }
+
+ up_read(&active_mm->mmap_sem);
}
static void show_ecr_verbose(struct pt_regs *regs)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 3f42ea8dbaea..b39196ef6131 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
<http://www.arm.linux.org.uk/>.
config ARM_HAS_SG_CHAIN
+ select ARCH_HAS_SG_CHAIN
bool
config NEED_SG_DMA_LENGTH
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 23e728ecf8ab..2d95820276fd 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += parport.h
generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += segment.h
generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 853080cb94f3..0d5cb87d7c27 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2,6 +2,7 @@ config ARM64
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi
index f8c40a66e65d..c5f0a47a1375 100644
--- a/arch/arm64/boot/dts/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm-storm.dtsi
@@ -257,6 +257,19 @@
enable-offset = <0x0>;
enable-mask = <0x39>;
};
+
+ rtcclk: rtcclk@17000000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x17000000 0x0 0x2000>;
+ reg-names = "csr-reg";
+ csr-offset = <0xc>;
+ csr-mask = <0x2>;
+ enable-offset = <0x10>;
+ enable-mask = <0x2>;
+ clock-output-names = "rtcclk";
+ };
};
serial0: serial@1c020000 {
@@ -342,5 +355,13 @@
phys = <&phy3 0>;
phy-names = "sata-phy";
};
+
+ rtc: rtc@10510000 {
+ compatible = "apm,xgene-rtc";
+ reg = <0x0 0x10510000 0x0 0x400>;
+ interrupts = <0x0 0x46 0x4>;
+ #clock-cells = <1>;
+ clocks = <&rtcclk 0>;
+ };
};
};
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index c35414bdf7bd..c8c8ff9eff61 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -12,7 +12,6 @@
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index afff5105909d..31742dfadff9 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += linkage.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vga.h
generic-y += xor.h
diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
deleted file mode 100644
index f11f8f40ec4a..000000000000
--- a/arch/cris/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_CRIS_SCATTERLIST_H
-#define __ASM_CRIS_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(__ASM_CRIS_SCATTERLIST_H) */
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index 5cc7d1991e48..0f40fed1ba25 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -15,7 +15,6 @@
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 87b95eb8aee5..5b73921b6e9d 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
deleted file mode 100644
index 0e5eb3018468..000000000000
--- a/arch/frv/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !_ASM_SCATTERLIST_H */
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 70ec7293dce7..17b5df8fc28a 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -13,7 +13,6 @@
/* #define __ARCH_WANT_SYS_GETHOSTNAME */
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
-/* #define __ARCH_WANT_SYS_SGETMASK */
/* #define __ARCH_WANT_SYS_SIGNAL */
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 12c3afee0f6f..43e7290fbccf 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -27,6 +27,7 @@ config IA64
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
+ select ARCH_HAS_SG_CHAIN
select VIRT_TO_BUS
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 3202aa74e0d6..6437ca21f61b 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -21,7 +21,8 @@
#define PENALTY_FOR_NODE_WITH_CPUS 255
/*
- * Distance above which we begin to use zone reclaim
+ * Nodes within this distance are eligible for reclaim by zone_reclaim() when
+ * zone_reclaim_mode is enabled.
*/
#define RECLAIM_DISTANCE 15
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 67779a74b62d..accc10a3dc78 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
deleted file mode 100644
index 7370b8b6243e..000000000000
--- a/arch/m32r/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_M32R_SCATTERLIST_H
-#define _ASM_M32R_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_M32R_SCATTERLIST_H */
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 214320b50384..8c8ce5e1ee0e 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -60,15 +60,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
__const_sigismember(set,sig) : \
__gen_sigismember(set,sig))
-static inline int sigfindinword(unsigned long word)
-{
- asm ("bfffo %1{#0,#0},%0"
- : "=d" (word)
- : "d" (word & -word)
- : "cc");
- return word ^ 31;
-}
-
#endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
#ifndef __uClinux__
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 9d38b73989eb..6a43c160f50e 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -13,7 +13,6 @@
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 3a480b3df0d6..d2263a0bdc3d 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -376,7 +376,6 @@ cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
asmlinkage int
sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
{
- struct vm_area_struct *vma;
int ret = -EINVAL;
if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
@@ -389,16 +388,23 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
if (!capable(CAP_SYS_ADMIN))
goto out;
} else {
+ struct vm_area_struct *vma;
+ bool invalid;
+
+ /* Check for overflow. */
+ if (addr + len < addr)
+ goto out;
+
/*
* Verify that the specified address region actually belongs
* to this process.
*/
- vma = find_vma (current->mm, addr);
ret = -EINVAL;
- /* Check for overflow. */
- if (addr + len < addr)
- goto out;
- if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, addr);
+ invalid = !vma || addr < vma->vm_start || addr + len > vma->vm_end;
+ up_read(&current->mm->mmap_sem);
+ if (invalid)
goto out;
}
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index c98ed95c0541..2ea655be809d 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -6,5 +6,6 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += syscalls.h
generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/microblaze/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index b14232b6878f..fd56a8f66489 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -19,7 +19,6 @@
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/mips/dec/Makefile b/arch/mips/dec/Makefile
index 3d5d2c56de8d..bd74e05c90b0 100644
--- a/arch/mips/dec/Makefile
+++ b/arch/mips/dec/Makefile
@@ -3,7 +3,7 @@
#
obj-y := ecc-berr.o int-handler.o ioasic-irq.o kn01-berr.o \
- kn02-irq.o kn02xa-berr.o reset.o setup.o time.o
+ kn02-irq.o kn02xa-berr.o platform.o reset.o setup.o time.o
obj-$(CONFIG_TC) += tc.o
obj-$(CONFIG_CPU_HAS_WB) += wbflush.o
diff --git a/arch/mips/dec/platform.c b/arch/mips/dec/platform.c
new file mode 100644
index 000000000000..c7ac86af847a
--- /dev/null
+++ b/arch/mips/dec/platform.c
@@ -0,0 +1,44 @@
+/*
+ * DEC platform devices.
+ *
+ * Copyright (c) 2014 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+
+static struct resource dec_rtc_resources[] = {
+ {
+ .name = "rtc",
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct cmos_rtc_board_info dec_rtc_info = {
+ .flags = CMOS_RTC_FLAGS_NOFREQ,
+ .address_space = 64,
+};
+
+static struct platform_device dec_rtc_device = {
+ .name = "rtc_cmos",
+ .id = PLATFORM_DEVID_NONE,
+ .dev.platform_data = &dec_rtc_info,
+ .resource = dec_rtc_resources,
+ .num_resources = ARRAY_SIZE(dec_rtc_resources),
+};
+
+static int __init dec_add_devices(void)
+{
+ dec_rtc_resources[0].start = RTC_PORT(0);
+ dec_rtc_resources[0].end = RTC_PORT(0) + dec_kn_slot_size - 1;
+ return platform_device_register(&dec_rtc_device);
+}
+
+device_initcall(dec_add_devices);
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 413d6c612bec..e55813029d5a 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -29,7 +29,6 @@
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_UTIME
#define __ARCH_WANT_SYS_WAITPID
#define __ARCH_WANT_SYS_SOCKETCALL
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 074e857ced28..c51bd20cd081 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -712,10 +712,12 @@ int process_fpemu_return(int sig, void __user *fault_addr)
si.si_addr = fault_addr;
si.si_signo = sig;
if (sig == SIGSEGV) {
+ down_read(&current->mm->mmap_sem);
if (find_vma(current->mm, (unsigned long)fault_addr))
si.si_code = SEGV_ACCERR;
else
si.si_code = SEGV_MAPERR;
+ up_read(&current->mm->mmap_sem);
} else {
si.si_code = BUS_ADRERR;
}
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index f41a5c5b0865..05b1d7cf9514 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -137,8 +137,10 @@ static void octeon_flush_cache_sigtramp(unsigned long addr)
{
struct vm_area_struct *vma;
+ down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, addr);
octeon_flush_icache_all_cores(vma);
+ up_read(&current->mm->mmap_sem);
}
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 654d5ba6e310..ecbd6676bd33 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
deleted file mode 100644
index 7baa4006008a..000000000000
--- a/arch/mn10300/include/asm/scatterlist.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Scatterlist definitions
- *
- * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCATTERLIST_H */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 9d4e2d1ef90e..0522468f488b 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -26,7 +26,6 @@
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 74d835820ee7..5f4c68daa261 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -145,7 +145,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_TIME
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e0998997943b..caece570d0b4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
select HAVE_DMA_API_DEBUG
select HAVE_OPROFILE
select HAVE_DEBUG_KMEMLEAK
+ select ARCH_HAS_SG_CHAIN
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 3ebb188c3ff5..d98c1ecc3266 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -44,6 +44,12 @@ static inline int pte_present(pte_t pte)
return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA);
}
+#define pte_present_nonuma pte_present_nonuma
+static inline int pte_present_nonuma(pte_t pte)
+{
+ return pte_val(pte) & (_PAGE_PRESENT);
+}
+
#define pte_numa pte_numa
static inline int pte_numa(pte_t pte)
{
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c9202151079f..6c8a8c5a37a1 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -9,12 +9,8 @@ struct device_node;
#ifdef CONFIG_NUMA
/*
- * Before going off node we want the VM to try and reclaim from the local
- * node. It does this if the remote distance is larger than RECLAIM_DISTANCE.
- * With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of
- * 20, we never reclaim and go off node straight away.
- *
- * To fix this we choose a smaller value of RECLAIM_DISTANCE.
+ * If zone_reclaim_mode is enabled, a RECLAIM_DISTANCE of 10 will mean that
+ * all zones on all nodes will be eligible for zone_reclaim().
*/
#define RECLAIM_DISTANCE 10
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 4494f029b632..cadd550898f3 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -29,7 +29,6 @@
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
#include <linux/export.h>
#include <asm/tlbflush.h>
+#include <asm/dma.h>
#include "mmu_decl.h"
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 6c0b1f5f8d2c..fa9fb5b4c66c 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -134,7 +134,7 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct vm_area_struct *vma = walk->private;
+ struct vm_area_struct *vma = walk->vma;
split_huge_page_pmd(vma, addr, pmd);
return 0;
}
@@ -163,9 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
if (vma->vm_start >= (addr + len))
break;
vma->vm_flags |= VM_NOHUGEPAGE;
- subpage_proto_walk.private = vma;
- walk_page_range(vma->vm_start, vma->vm_end,
- &subpage_proto_walk);
+ walk_page_vma(vma, &subpage_proto_walk);
vma = vma->vm_next;
}
}
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
+#include <asm/dma.h>
static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
#include <generated/utsrelease.h>
#include <linux/pci.h>
#include <linux/of.h>
+#include <asm/dma.h>
#include <asm/prom.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
#include <asm/i8259.h>
#include <asm/time.h>
#include <asm/udbg.h>
+#include <asm/dma.h>
extern void __flush_disable_L1(void);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 009c58b575c0..f357d8e7ddce 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -146,6 +146,7 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+ select ARCH_HAS_SG_CHAIN
config SCHED_OMIT_FRAME_POINTER
def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 2f947aba4bd4..aad209199f7e 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -8,5 +8,6 @@ generic-y += cputime.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += xor.h
diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
deleted file mode 100644
index 9f533b8362c7..000000000000
--- a/arch/score/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_SCATTERLIST_H
-#define _ASM_SCORE_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCORE_SCATTERLIST_H */
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index e77816c4b9bc..126fe8340b22 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -11,7 +11,6 @@
# define __ARCH_WANT_SYS_GETHOSTNAME
# define __ARCH_WANT_SYS_IPC
# define __ARCH_WANT_SYS_PAUSE
-# define __ARCH_WANT_SYS_SGETMASK
# define __ARCH_WANT_SYS_SIGNAL
# define __ARCH_WANT_SYS_TIME
# define __ARCH_WANT_SYS_UTIME
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index f9173766ec4b..2197fc584186 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -52,7 +52,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
int i;
for (i = 0; i < sh_ubc->num_events; i++) {
- struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+ struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
if (!*slot) {
*slot = bp;
@@ -84,7 +84,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
int i;
for (i = 0; i < sh_ubc->num_events; i++) {
- struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+ struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
if (*slot == bp) {
*slot = NULL;
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 42b46e61a2d5..83acbf3f6de8 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -102,7 +102,7 @@ int __kprobes kprobe_handle_illslot(unsigned long pc)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- struct kprobe *saved = &__get_cpu_var(saved_next_opcode);
+ struct kprobe *saved = this_cpu_ptr(&saved_next_opcode);
if (saved->addr) {
arch_disarm_kprobe(p);
@@ -111,7 +111,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
saved->addr = NULL;
saved->opcode = 0;
- saved = &__get_cpu_var(saved_next_opcode2);
+ saved = this_cpu_ptr(&saved_next_opcode2);
if (saved->addr) {
arch_disarm_kprobe(saved);
@@ -129,14 +129,14 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
}
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = p;
+ __this_cpu_write(current_kprobe, p);
}
/*
@@ -146,15 +146,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
*/
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
- __get_cpu_var(saved_current_opcode).addr = (kprobe_opcode_t *)regs->pc;
+ __this_cpu_write(saved_current_opcode.addr, (kprobe_opcode_t *)regs->pc);
if (p != NULL) {
struct kprobe *op1, *op2;
arch_disarm_kprobe(p);
- op1 = &__get_cpu_var(saved_next_opcode);
- op2 = &__get_cpu_var(saved_next_opcode2);
+ op1 = this_cpu_ptr(&saved_next_opcode);
+ op2 = this_cpu_ptr(&saved_next_opcode2);
if (OPCODE_JSR(p->opcode) || OPCODE_JMP(p->opcode)) {
unsigned int reg_nr = ((p->opcode >> 8) & 0x000F);
@@ -249,7 +249,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
kcb->kprobe_status = KPROBE_REENTER;
return 1;
} else {
- p = __get_cpu_var(current_kprobe);
+ p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe;
}
@@ -336,9 +336,9 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
continue;
if (ri->rp && ri->rp->handler) {
- __get_cpu_var(current_kprobe) = &ri->rp->kp;
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
ri->rp->handler(ri, regs);
- __get_cpu_var(current_kprobe) = NULL;
+ __this_cpu_write(current_kprobe, NULL);
}
orig_ret_address = (unsigned long)ri->ret_addr;
@@ -383,19 +383,19 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
cur->post_handler(cur, regs, 0);
}
- p = &__get_cpu_var(saved_next_opcode);
+ p = this_cpu_ptr(&saved_next_opcode);
if (p->addr) {
arch_disarm_kprobe(p);
p->addr = NULL;
p->opcode = 0;
- addr = __get_cpu_var(saved_current_opcode).addr;
- __get_cpu_var(saved_current_opcode).addr = NULL;
+ addr = __this_cpu_read(saved_current_opcode.addr);
+ __this_cpu_write(saved_current_opcode.addr, NULL);
p = get_kprobe(addr);
arch_arm_kprobe(p);
- p = &__get_cpu_var(saved_next_opcode2);
+ p = this_cpu_ptr(&saved_next_opcode2);
if (p->addr) {
arch_disarm_kprobe(p);
p->addr = NULL;
@@ -511,7 +511,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
if (kprobe_handler(args->regs)) {
ret = NOTIFY_STOP;
} else {
- p = __get_cpu_var(current_kprobe);
+ p = __this_cpu_read(current_kprobe);
if (p->break_handler &&
p->break_handler(p, args->regs))
ret = NOTIFY_STOP;
diff --git a/arch/sh/kernel/localtimer.c b/arch/sh/kernel/localtimer.c
index 8bfc6dfa8b94..b880a7e2ace7 100644
--- a/arch/sh/kernel/localtimer.c
+++ b/arch/sh/kernel/localtimer.c
@@ -32,7 +32,7 @@ static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
*/
void local_timer_interrupt(void)
{
- struct clock_event_device *clk = &__get_cpu_var(local_clockevent);
+ struct clock_event_device *clk = this_cpu_ptr(&local_clockevent);
irq_enter();
clk->event_handler(clk);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index b9cefebda55c..02331672b6db 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -227,7 +227,7 @@ again:
static void sh_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -245,7 +245,7 @@ static void sh_pmu_stop(struct perf_event *event, int flags)
static void sh_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -262,7 +262,7 @@ static void sh_pmu_start(struct perf_event *event, int flags)
static void sh_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
sh_pmu_stop(event, PERF_EF_UPDATE);
__clear_bit(event->hw.idx, cpuc->used_mask);
@@ -272,7 +272,7 @@ static void sh_pmu_del(struct perf_event *event, int flags)
static int sh_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
int ret = -EAGAIN;
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 86a7936a980b..fc5acfc93c92 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -111,7 +111,7 @@ void play_dead_common(void)
irq_ctx_exit(raw_smp_processor_id());
mb();
- __get_cpu_var(cpu_state) = CPU_DEAD;
+ __this_cpu_write(cpu_state, CPU_DEAD);
local_irq_disable();
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 29f2e988c56a..e1ea0ff154d7 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
select MODULES_USE_ELF_RELA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
+ select ARCH_HAS_SG_CHAIN
config SPARC32
def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += mutex.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += types.h
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index abf6afe82ca8..4f072b91b61b 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
return retval;
}
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
extern void *hardirq_stack[NR_CPUS];
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index dfa53fdd5cbc..0aac1e8f2968 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -25,7 +25,6 @@
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
-#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d7b4967f8fa6..9975a6dbca0a 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
}
}
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
{
struct thread_info *tp = current_thread_info();
struct pt_regs *regs = get_irq_regs();
@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
- memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
-
this_cpu = raw_smp_processor_id();
- __global_reg_self(tp, regs, this_cpu);
+ memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+ if (include_self)
+ __global_reg_self(tp, regs, this_cpu);
smp_fetch_global_regs();
for_each_online_cpu(cpu) {
- struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
+ struct global_reg_snapshot *gp;
+
+ if (!include_self && cpu == this_cpu)
+ continue;
+
+ gp = &global_cpu_snapshot[cpu].reg;
__global_reg_poll(gp);
@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
static void sysrq_handle_globreg(int key)
{
- arch_trigger_all_cpu_backtrace();
+ arch_trigger_all_cpu_backtrace(true);
}
static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 004ba568d93f..33294fdc402e 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -417,7 +417,7 @@ void __homecache_free_pages(struct page *page, unsigned int order)
if (put_page_testzero(page)) {
homecache_change_page_home(page, order, PAGE_HOME_HASH);
if (order == 0) {
- free_hot_cold_page(page, 0);
+ free_hot_cold_page(page, false);
} else {
init_page_count(page);
__free_pages(page, order);
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += switch_to.h
generic-y += topology.h
diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c
index 13068ee22f33..bf012b2b71a9 100644
--- a/arch/unicore32/mm/ioremap.c
+++ b/arch/unicore32/mm/ioremap.c
@@ -144,11 +144,11 @@ void __iomem *__uc32_ioremap_pfn_caller(unsigned long pfn,
* Don't allow RAM to be mapped
*/
if (pfn_valid(pfn)) {
- printk(KERN_WARNING "BUG: Your driver calls ioremap() on\n"
+ WARN(1, "BUG: Your driver calls ioremap() on\n"
"system memory. This leads to architecturally\n"
"unpredictable behaviour, and ioremap() will fail in\n"
"the next kernel release. Please fix your driver.\n");
- WARN_ON(1);
+ return NULL;
}
type = get_mem_type(mtype);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e82ef3b93131..e7523ca269d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,7 +26,7 @@ config X86
select ARCH_MIGHT_HAVE_PC_SERIO
select HAVE_AOUT if X86_32
select HAVE_UNSTABLE_SCHED_CLOCK
- select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
select ARCH_SUPPORTS_INT128 if X86_64
select ARCH_WANTS_PROT_NUMA_PROT_NONE
select HAVE_IDE
@@ -41,7 +41,7 @@ config X86
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
- select HAVE_DMA_CONTIGUOUS if !SWIOTLB
+ select HAVE_DMA_CONTIGUOUS
select HAVE_KRETPROBES
select GENERIC_EARLY_IOREMAP
select HAVE_OPTPROBES
@@ -96,6 +96,7 @@ config X86
select IRQ_FORCED_THREADING
select HAVE_BPF_JIT if X86_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_SG_CHAIN
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
@@ -105,7 +106,7 @@ config X86
select HAVE_ARCH_SECCOMP_FILTER
select BUILDTIME_EXTABLE_SORT
select GENERIC_CMOS_UPDATE
- select HAVE_ARCH_SOFT_DIRTY
+ select HAVE_ARCH_SOFT_DIRTY if X86_64
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
select ARCH_CLOCKSOURCE_DATA
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
generic-y += clkdev.h
-generic-y += early_ioremap.h
generic-y += cputime.h
+generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index cb6cfcd034cf..a80cbb88ea91 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
extern void init_ISA_irqs(void);
#ifdef CONFIG_X86_LOCAL_APIC
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
#endif
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 0d193e234647..206a87fdd22d 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -62,66 +62,14 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
return ((value >> rightshift) & mask) << leftshift;
}
-#ifdef CONFIG_MEM_SOFT_DIRTY
-
-/*
- * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
- * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
- * into this range.
- */
-#define PTE_FILE_MAX_BITS 28
-#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
-#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
-#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
-#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
-#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
-#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
-#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
-
-#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
-#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
-#define PTE_FILE_MASK3 ((1U << PTE_FILE_BITS3) - 1)
-
-#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
-#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
-#define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)
-
-static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
-{
- return (pgoff_t)
- (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) +
- pte_bitop(pte.pte_low, PTE_FILE_SHIFT4, -1UL, PTE_FILE_LSHIFT4));
-}
-
-static __always_inline pte_t pgoff_to_pte(pgoff_t off)
-{
- return (pte_t){
- .pte_low =
- pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) +
- pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) +
- pte_bitop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) +
- pte_bitop(off, PTE_FILE_LSHIFT4, -1UL, PTE_FILE_SHIFT4) +
- _PAGE_FILE,
- };
-}
-
-#else /* CONFIG_MEM_SOFT_DIRTY */
-
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
* split up the 29 bits of offset into this range.
*/
#define PTE_FILE_MAX_BITS 29
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
-#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
-#else
-#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1)
-#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1)
-#endif
#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
@@ -150,16 +98,9 @@ static __always_inline pte_t pgoff_to_pte(pgoff_t off)
};
}
-#endif /* CONFIG_MEM_SOFT_DIRTY */
-
/* Encode and de-code a swap entry */
-#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
-#else
-#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
-#endif
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b459ddf27d64..0ec056012618 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -131,7 +131,8 @@ static inline int pte_exec(pte_t pte)
static inline int pte_special(pte_t pte)
{
- return pte_flags(pte) & _PAGE_SPECIAL;
+ return (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_SPECIAL)) ==
+ (_PAGE_PRESENT|_PAGE_SPECIAL);
}
static inline unsigned long pte_pfn(pte_t pte)
@@ -296,6 +297,7 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_PRESENT);
}
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
return pte_flags(pte) & _PAGE_SOFT_DIRTY;
@@ -331,6 +333,8 @@ static inline int pte_file_soft_dirty(pte_t pte)
return pte_flags(pte) & _PAGE_SOFT_DIRTY;
}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.
@@ -452,6 +456,12 @@ static inline int pte_present(pte_t a)
_PAGE_NUMA);
}
+#define pte_present_nonuma pte_present_nonuma
+static inline int pte_present_nonuma(pte_t a)
+{
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
#define pte_accessible pte_accessible
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
@@ -858,23 +868,25 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
{
}
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present(pte));
+ VM_BUG_ON(pte_present_nonuma(pte));
return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present(pte));
+ VM_BUG_ON(pte_present_nonuma(pte));
return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
- VM_BUG_ON(pte_present(pte));
+ VM_BUG_ON(pte_present_nonuma(pte));
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
+#endif
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e22c1dbf7feb..5be9063545d2 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,12 +143,12 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) ((void)(pte))/* NOP */
/* Encode and de-code a swap entry */
-#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+#ifdef CONFIG_NUMA_BALANCING
+/* Automatic NUMA balancing needs to be distinguishable from swap entries */
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
#else
-#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
#endif
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index eb3d44945133..f216963760e5 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -16,15 +16,26 @@
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
-#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
-#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */
+#define _PAGE_BIT_SOFTW1 9 /* available for programmer */
+#define _PAGE_BIT_SOFTW2 10 /* " */
+#define _PAGE_BIT_SOFTW3 11 /* " */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
-#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */
+#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
+#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
+#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
+#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
+#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
+#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
+/*
+ * Swap offsets on configurations that allow automatic NUMA balancing use the
+ * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
+ * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
+ * maximum possible swap space from 16TB to 8TB.
+ */
+#define _PAGE_BIT_NUMA (_PAGE_BIT_GLOBAL+1)
+
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
@@ -40,7 +51,7 @@
#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
@@ -61,8 +72,6 @@
* they do not conflict with each other.
*/
-#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
-
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
#else
@@ -70,6 +79,21 @@
#endif
/*
+ * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
+ * that is not present. The hinting fault gathers numa placement statistics
+ * (see pte_numa()). The bit is always zero when the PTE is not present.
+ *
+ * The bit picked must be always zero when the pmd is present and not
+ * present, so that we don't lose information when we set it while
+ * atomically clearing the present bit.
+ */
+#ifdef CONFIG_NUMA_BALANCING
+#define _PAGE_NUMA (_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
+#else
+#define _PAGE_NUMA (_AT(pteval_t, 0))
+#endif
+
+/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
* with swap entry format. On x86 bits 6 and 7 are *not* involved
@@ -94,26 +118,6 @@
#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-/*
- * _PAGE_NUMA indicates that this page will trigger a numa hinting
- * minor page fault to gather numa placement statistics (see
- * pte_numa()). The bit picked (8) is within the range between
- * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
- * require changes to the swp entry format because that bit is always
- * zero when the pte is not present.
- *
- * The bit picked must be always zero when the pmd is present and not
- * present, so that we don't lose information when we set it while
- * atomically clearing the present bit.
- *
- * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
- * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
- * couldn't reach, like handle_mm_fault() (see access_error in
- * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
- * handle_mm_fault() to be invoked).
- */
-#define _PAGE_NUMA _PAGE_PROTNONE
-
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
@@ -122,8 +126,8 @@
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+ _PAGE_SOFT_DIRTY | _PAGE_NUMA)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)
#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
#define _PAGE_CACHE_WB (0)
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 35e67a457182..31eab867e6d3 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -92,12 +92,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
? __const_sigismember((set), (sig)) \
: __gen_sigismember((set), (sig)))
-static inline int sigfindinword(unsigned long word)
-{
- asm("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc");
- return word;
-}
-
struct pt_regs;
#else /* __i386__ */
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 977f1761a25d..ab05d73e2bb7 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
static inline void dma_mark_clean(void *addr, size_t size) {}
+extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs);
+extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs);
+
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 3f556c6a0157..2b19caa4081c 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -41,7 +41,6 @@
# define __ARCH_WANT_SYS_OLD_GETRLIMIT
# define __ARCH_WANT_SYS_OLD_UNAME
# define __ARCH_WANT_SYS_PAUSE
-# define __ARCH_WANT_SYS_SGETMASK
# define __ARCH_WANT_SYS_SIGNAL
# define __ARCH_WANT_SYS_SIGPENDING
# define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b574b295a2f9..8e3842fc8bea 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, struct dma_attrs *attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
- free_pages((unsigned long)vaddr, get_order(size));
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c3fcb5de5083..3dbfe1ef082b 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -33,31 +33,40 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* "in progress" flag of arch_trigger_all_cpu_backtrace */
static unsigned long backtrace_flag;
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
{
int i;
+ int cpu = get_cpu();
- if (test_and_set_bit(0, &backtrace_flag))
+ if (test_and_set_bit(0, &backtrace_flag)) {
/*
* If there is already a trigger_all_cpu_backtrace() in progress
* (backtrace_flag == 1), don't output double cpu dump infos.
*/
+ put_cpu();
return;
+ }
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+ if (!include_self)
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
- printk(KERN_INFO "sending NMI to all CPUs:\n");
- apic->send_IPI_all(NMI_VECTOR);
+ if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+ pr_info("sending NMI to %s CPUs:\n", (include_self ? "all" : "other"));
+ apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
+ }
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
+ touch_softlockup_watchdog();
}
clear_bit(0, &backtrace_flag);
smp_mb__after_atomic();
+ put_cpu();
}
static int
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f7d0672481fd..a25e202bb319 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -97,12 +97,17 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_mask = dma_alloc_coherent_mask(dev, flag);
- flag |= __GFP_ZERO;
+ flag &= ~__GFP_ZERO;
again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
- if (flag & __GFP_WAIT)
+ if (flag & __GFP_WAIT) {
page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ if (page && page_to_phys(page) + size > dma_mask) {
+ dma_release_from_contiguous(dev, page, count);
+ page = NULL;
+ }
+ }
/* fallback */
if (!page)
page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
@@ -120,7 +125,7 @@ again:
return NULL;
}
-
+ memset(page_address(page), 0, size);
*dma_addr = addr;
return page_address(page);
}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6c483ba98b9c..77dd0ad58be4 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -14,7 +14,7 @@
#include <asm/iommu_table.h>
int swiotlb __read_mostly;
-static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
@@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
-static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
struct dma_attrs *attrs)
{
- swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+ if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+ else
+ dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
static struct dma_map_ops swiotlb_dma_ops = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 09c76d265550..78a0e6298922 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
memblock_set_current_limit(get_max_mapped());
- dma_contiguous_reserve(0);
+ dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6f881842116c..3c7205d4de52 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1231,17 +1231,43 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return NULL;
}
-#ifdef CONFIG_X86_UV
-unsigned long memory_block_size_bytes(void)
+static unsigned long probe_memory_block_size(void)
{
+ /* start from 2g */
+ unsigned long bz = 1UL<<31;
+
+#ifdef CONFIG_X86_UV
if (is_uv_system()) {
printk(KERN_INFO "UV: memory block size 2GB\n");
return 2UL * 1024 * 1024 * 1024;
}
- return MIN_MEMORY_BLOCK_SIZE;
-}
#endif
+ /* less than 64g installed */
+ if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
+ return MIN_MEMORY_BLOCK_SIZE;
+
+ /* get the tail size */
+ while (bz > MIN_MEMORY_BLOCK_SIZE) {
+ if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
+ break;
+ bz >>= 1;
+ }
+
+ printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+
+ return bz;
+}
+
+static unsigned long memory_block_size_probed;
+unsigned long memory_block_size_bytes(void)
+{
+ if (!memory_block_size_probed)
+ memory_block_size_probed = probe_memory_block_size();
+
+ return memory_block_size_probed;
+}
+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* Initialise the sparsemem vmemmap using huge-pages at the PMD level.
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1d045f9c390f..a32b706c401a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -559,7 +559,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
int i, nid;
nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
unsigned long start, end;
- struct memblock_type *type = &memblock.reserved;
+ struct memblock_region *r;
/*
* At this time, all memory regions reserved by memblock are
@@ -573,8 +573,8 @@ static void __init numa_clear_kernel_node_hotplug(void)
}
/* Mark all kernel nodes. */
- for (i = 0; i < type->cnt; i++)
- node_set(type->regions[i].nid, numa_kernel_nodes);
+ for_each_memblock(reserved, r)
+ node_set(r->nid, numa_kernel_nodes);
/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 461bc8289024..6629f397b467 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -35,7 +35,7 @@ enum {
static int pte_testbit(pte_t pte)
{
- return pte_flags(pte) & _PAGE_UNUSED1;
+ return pte_flags(pte) & _PAGE_SOFTW1;
}
struct split_state {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 9d8a509c9730..5ceda85b8687 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
{
void *vaddr;
- vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
- if (!vaddr)
- vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+ vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
return vaddr;
}
@@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
static struct dma_map_ops sta2x11_dma_ops = {
.alloc = sta2x11_swiotlb_alloc_coherent,
- .free = swiotlb_free_coherent,
+ .free = x86_swiotlb_free_coherent,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
.map_sg = swiotlb_map_sg_attrs,
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..7bd4372e8b6f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 4b7b4522b64f..23b8726962af 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -258,7 +258,7 @@ endchoice
config CMA_ALIGNMENT
int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
- range 4 9
+ range 4 12
default 8
help
DMA mapping framework by default aligns all buffers to the smallest
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index fe72bac96275..676f90b14fe9 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -60,11 +60,22 @@ struct cma *dma_contiguous_default_area;
*/
static const phys_addr_t size_bytes = CMA_SIZE_MBYTES * SZ_1M;
static phys_addr_t size_cmdline = -1;
+static phys_addr_t base_cmdline;
+static phys_addr_t limit_cmdline;
static int __init early_cma(char *p)
{
pr_debug("%s(%s)\n", __func__, p);
size_cmdline = memparse(p, &p);
+ if (*p != '@')
+ return 0;
+ base_cmdline = memparse(p + 1, &p);
+ if (*p != '-') {
+ limit_cmdline = base_cmdline + size_cmdline;
+ return 0;
+ }
+ limit_cmdline = memparse(p + 1, &p);
+
return 0;
}
early_param("cma", early_cma);
@@ -108,11 +119,18 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
void __init dma_contiguous_reserve(phys_addr_t limit)
{
phys_addr_t selected_size = 0;
+ phys_addr_t selected_base = 0;
+ phys_addr_t selected_limit = limit;
+ bool fixed = false;
pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
if (size_cmdline != -1) {
selected_size = size_cmdline;
+ selected_base = base_cmdline;
+ selected_limit = min_not_zero(limit_cmdline, limit);
+ if (base_cmdline + size_cmdline == limit_cmdline)
+ fixed = true;
} else {
#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
selected_size = size_bytes;
@@ -129,10 +147,12 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
- dma_contiguous_reserve_area(selected_size, 0, limit,
- &dma_contiguous_default_area);
+ dma_contiguous_reserve_area(selected_size, selected_base,
+ selected_limit,
+ &dma_contiguous_default_area,
+ fixed);
}
-};
+}
static DEFINE_MUTEX(cma_mutex);
@@ -189,15 +209,20 @@ core_initcall(cma_init_reserved_areas);
* @base: Base address of the reserved area optional, use 0 for any
* @limit: End address of the reserved memory (optional, 0 for any).
* @res_cma: Pointer to store the created cma region.
+ * @fixed: hint about where to place the reserved area
*
* This function reserves memory from early allocator. It should be
* called by arch specific code once the early allocator (memblock or bootmem)
* has been activated and all other subsystems have already allocated/reserved
* memory. This function allows to create custom reserved areas for specific
* devices.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base. If false,
+ * reserve in range from @base to @limit.
*/
int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
- phys_addr_t limit, struct cma **res_cma)
+ phys_addr_t limit, struct cma **res_cma,
+ bool fixed)
{
struct cma *cma = &cma_areas[cma_area_count];
phys_addr_t alignment;
@@ -223,18 +248,15 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
limit &= ~(alignment - 1);
/* Reserve memory */
- if (base) {
+ if (base && fixed) {
if (memblock_is_region_reserved(base, size) ||
memblock_reserve(base, size) < 0) {
ret = -EBUSY;
goto err;
}
} else {
- /*
- * Use __memblock_alloc_base() since
- * memblock_alloc_base() panic()s.
- */
- phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
+ phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+ limit);
if (!addr) {
ret = -ENOMEM;
goto err;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index bece691cb5d9..89f752dd8465 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -118,16 +118,6 @@ static ssize_t show_mem_start_phys_index(struct device *dev,
return sprintf(buf, "%08lx\n", phys_index);
}
-static ssize_t show_mem_end_phys_index(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct memory_block *mem = to_memory_block(dev);
- unsigned long phys_index;
-
- phys_index = mem->end_section_nr / sections_per_block;
- return sprintf(buf, "%08lx\n", phys_index);
-}
-
/*
* Show whether the section of memory is likely to be hot-removable
*/
@@ -384,7 +374,6 @@ static ssize_t show_phys_device(struct device *dev,
}
static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
-static DEVICE_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state);
static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL);
static DEVICE_ATTR(removable, 0444, show_mem_removable, NULL);
@@ -529,7 +518,6 @@ struct memory_block *find_memory_block(struct mem_section *section)
static struct attribute *memory_memblk_attrs[] = {
&dev_attr_phys_index.attr,
- &dev_attr_end_phys_index.attr,
&dev_attr_state.attr,
&dev_attr_phys_device.attr,
&dev_attr_removable.attr,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index e73b85cf0756..c7d138eca731 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -200,11 +200,11 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
copy = min_t(size_t, n, PAGE_SIZE - offset);
if (!brd_insert_page(brd, sector))
- return -ENOMEM;
+ return -ENOSPC;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
if (!brd_insert_page(brd, sector))
- return -ENOMEM;
+ return -ENOSPC;
}
return 0;
}
@@ -360,6 +360,15 @@ out:
bio_endio(bio, err);
}
+static int brd_rw_page(struct block_device *bdev, sector_t sector,
+ struct page *page, int rw)
+{
+ struct brd_device *brd = bdev->bd_disk->private_data;
+ int err = brd_do_bvec(brd, page, PAGE_CACHE_SIZE, 0, rw, sector);
+ page_endio(page, rw & WRITE, err);
+ return err;
+}
+
#ifdef CONFIG_BLK_DEV_XIP
static int brd_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, unsigned long *pfn)
@@ -375,7 +384,7 @@ static int brd_direct_access(struct block_device *bdev, sector_t sector,
return -ERANGE;
page = brd_insert_page(brd, sector);
if (!page)
- return -ENOMEM;
+ return -ENOSPC;
*kaddr = page_address(page);
*pfn = page_to_pfn(page);
@@ -419,6 +428,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
static const struct block_device_operations brd_fops = {
.owner = THIS_MODULE,
+ .rw_page = brd_rw_page,
.ioctl = brd_ioctl,
#ifdef CONFIG_BLK_DEV_XIP
.direct_access = brd_direct_access,
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9849b5233bf4..48eccb350180 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -572,10 +572,10 @@ static void zram_bio_discard(struct zram *zram, u32 index,
* skipping this logical block is appropriate here.
*/
if (offset) {
- if (n < offset)
+ if (n <= (PAGE_SIZE - offset))
return;
- n -= offset;
+ n -= (PAGE_SIZE - offset);
index++;
}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 6c1885eedfdf..800158714473 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -467,14 +467,17 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
goto err_free;
}
+ down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, userptr);
if (!vma) {
+ up_read(&current->mm->mmap_sem);
DRM_ERROR("failed to get vm region.\n");
ret = -EFAULT;
goto err_free_pages;
}
if (vma->vm_end < userptr + size) {
+ up_read(&current->mm->mmap_sem);
DRM_ERROR("vma is too small.\n");
ret = -EFAULT;
goto err_free_pages;
@@ -482,6 +485,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
g2d_userptr->vma = exynos_gem_get_vma(vma);
if (!g2d_userptr->vma) {
+ up_read(&current->mm->mmap_sem);
DRM_ERROR("failed to copy vma.\n");
ret = -ENOMEM;
goto err_free_pages;
@@ -492,10 +496,12 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
ret = exynos_gem_get_pages_from_userptr(start & PAGE_MASK,
npages, pages, vma);
if (ret < 0) {
+ up_read(&current->mm->mmap_sem);
DRM_ERROR("failed to get user pages from userptr.\n");
goto err_put_vma;
}
+ up_read(&current->mm->mmap_sem);
g2d_userptr->pages = pages;
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a11ff74a5127..9eac8de9e8b7 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -178,6 +178,15 @@ comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig"
+config INPUT_LEDS
+ bool "LED Support"
+ depends on LEDS_CLASS = INPUT || LEDS_CLASS = y
+ select LEDS_TRIGGERS
+ default y
+ help
+ This option enables support for LEDs on keyboards managed
+ by the input layer.
+
source "drivers/input/mouse/Kconfig"
source "drivers/input/joystick/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 5ca3f631497f..2ab5f3336da5 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -6,6 +6,9 @@
obj-$(CONFIG_INPUT) += input-core.o
input-core-y := input.o input-compat.o input-mt.o ff-core.o
+ifeq ($(CONFIG_INPUT_LEDS),y)
+input-core-y += leds.o
+endif
obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 1c4c0db05550..3b9284b18e70 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -708,6 +708,9 @@ static void input_disconnect_device(struct input_dev *dev)
handle->open = 0;
spin_unlock_irq(&dev->event_lock);
+
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_disconnect(dev);
}
/**
@@ -2134,6 +2137,9 @@ int input_register_device(struct input_dev *dev)
list_add_tail(&dev->node, &input_dev_list);
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_connect(dev);
+
list_for_each_entry(handler, &input_handler_list, node)
input_attach_handler(dev, handler);
diff --git a/drivers/input/leds.c b/drivers/input/leds.c
new file mode 100644
index 000000000000..985fa7ebeec7
--- /dev/null
+++ b/drivers/input/leds.c
@@ -0,0 +1,249 @@
+/*
+ * LED support for the input layer
+ *
+ * Copyright 2010-2014 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+
+/*
+ * Keyboard LEDs are propagated by default like the following example:
+ *
+ * VT keyboard numlock trigger
+ * -> vt::numl VT LED
+ * -> vt-numl VT trigger
+ * -> per-device inputX::numl LED
+ *
+ * Userland can however choose the trigger for the vt::numl LED, or
+ * independently choose the trigger for any inputx::numl LED.
+ *
+ *
+ * VT LED classes and triggers are registered on-demand according to
+ * existing LED devices
+ */
+
+/* Handler for VT LEDs, just triggers the corresponding VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness);
+static struct led_classdev vt_leds[LED_CNT] = {
+#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \
+ [vt_led] = { \
+ .name = "vt::"nam, \
+ .max_brightness = 1, \
+ .brightness_set = vt_led_set, \
+ .default_trigger = deftrig, \
+ }
+/* Default triggers for the VT LEDs just correspond to the legacy
+ * usage. */
+ DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"),
+ DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"),
+ DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"),
+ DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL),
+ DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"),
+ DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL),
+ DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL),
+ DEFINE_INPUT_LED(LED_MUTE, "mute", NULL),
+ DEFINE_INPUT_LED(LED_MISC, "misc", NULL),
+ DEFINE_INPUT_LED(LED_MAIL, "mail", NULL),
+ DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL),
+};
+static const char *const vt_led_names[LED_CNT] = {
+ [LED_NUML] = "numl",
+ [LED_CAPSL] = "capsl",
+ [LED_SCROLLL] = "scrolll",
+ [LED_COMPOSE] = "compose",
+ [LED_KANA] = "kana",
+ [LED_SLEEP] = "sleep",
+ [LED_SUSPEND] = "suspend",
+ [LED_MUTE] = "mute",
+ [LED_MISC] = "misc",
+ [LED_MAIL] = "mail",
+ [LED_CHARGING] = "charging",
+};
+/* Handler for hotplug initialization */
+static void vt_led_trigger_activate(struct led_classdev *cdev);
+/* VT triggers */
+static struct led_trigger vt_led_triggers[LED_CNT] = {
+#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \
+ [vt_led] = { \
+ .name = "vt-"nam, \
+ .activate = vt_led_trigger_activate, \
+ }
+ DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"),
+ DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"),
+ DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"),
+};
+
+/* Lock for registration coherency */
+static DEFINE_MUTEX(vt_led_registered_lock);
+
+/* Which VT LED classes and triggers are registered */
+static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)];
+
+/* Number of input devices having each LED */
+static int vt_led_references[LED_CNT];
+
+/* VT LED state change, tell the VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ int led = cdev - vt_leds;
+
+ led_trigger_event(&vt_led_triggers[led], !!brightness);
+}
+
+/* LED state change for some keyboard, notify that keyboard. */
+static void perdevice_input_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ struct input_dev *dev;
+ struct led_classdev *leds;
+ int led;
+
+ dev = cdev->dev->platform_data;
+ if (!dev)
+ /* Still initializing */
+ return;
+ leds = dev->leds;
+ led = cdev - leds;
+
+ input_event(dev, EV_LED, led, !!brightness);
+ input_event(dev, EV_SYN, SYN_REPORT, 0);
+}
+
+/* Keyboard hotplug, initialize its LED status */
+static void vt_led_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - vt_led_triggers;
+
+ if (cdev->brightness_set)
+ cdev->brightness_set(cdev, vt_leds[led].brightness);
+}
+
+/* Free led stuff from input device, used at abortion and disconnection. */
+static void input_led_delete(struct input_dev *dev)
+{
+ if (dev) {
+ struct led_classdev *leds = dev->leds;
+ if (leds) {
+ int i;
+ for (i = 0; i < LED_CNT; i++)
+ kfree(leds[i].name);
+ kfree(leds);
+ dev->leds = NULL;
+ }
+ }
+}
+
+/* A new input device with potential LEDs to connect. */
+int input_led_connect(struct input_dev *dev)
+{
+ int i, error = 0;
+ struct led_classdev *leds;
+
+ dev->leds = leds = kcalloc(LED_CNT, sizeof(*leds), GFP_KERNEL);
+ if (!dev->leds)
+ return -ENOMEM;
+
+ /* lazily register missing VT LEDs */
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ if (!vt_led_references[i]) {
+ led_trigger_register(&vt_led_triggers[i]);
+ /* This keyboard is first to have led i,
+ * try to register it */
+ if (!led_classdev_register(NULL, &vt_leds[i]))
+ vt_led_references[i] = 1;
+ else
+ led_trigger_unregister(&vt_led_triggers[i]);
+ } else
+ vt_led_references[i]++;
+ }
+ mutex_unlock(&vt_led_registered_lock);
+
+ /* and register this device's LEDs */
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ leds[i].name = kasprintf(GFP_KERNEL, "%s::%s",
+ dev_name(&dev->dev),
+ vt_led_names[i]);
+ if (!leds[i].name) {
+ error = -ENOMEM;
+ goto err;
+ }
+ leds[i].max_brightness = 1;
+ leds[i].brightness_set = perdevice_input_led_set;
+ leds[i].default_trigger = vt_led_triggers[i].name;
+ }
+
+ /* No issue so far, we can register for real. */
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name) {
+ led_classdev_register(&dev->dev, &leds[i]);
+ leds[i].dev->platform_data = dev;
+ perdevice_input_led_set(&leds[i],
+ vt_leds[i].brightness);
+ }
+
+ return 0;
+
+err:
+ input_led_delete(dev);
+ return error;
+}
+
+/*
+ * Disconnected input device. Clean it, and deregister now-useless VT LEDs
+ * and triggers.
+ */
+void input_led_disconnect(struct input_dev *dev)
+{
+ int i;
+ struct led_classdev *leds = dev->leds;
+
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name)
+ led_classdev_unregister(&leds[i]);
+
+ input_led_delete(dev);
+
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++) {
+ if (!vt_leds[i].name || !test_bit(i, dev->ledbit))
+ continue;
+
+ vt_led_references[i]--;
+ if (vt_led_references[i]) {
+ /* Still some devices needing it */
+ continue;
+ }
+
+ led_classdev_unregister(&vt_leds[i]);
+ led_trigger_unregister(&vt_led_triggers[i]);
+ clear_bit(i, vt_led_registered);
+ }
+ mutex_unlock(&vt_led_registered_lock);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("User LED support for input layer");
+MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>");
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f256ffc02e29..6bb32773c3ac 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -39,6 +39,7 @@
#include <linux/dmi.h>
#include <linux/pci-ats.h>
#include <linux/memblock.h>
+#include <linux/dma-contiguous.h>
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -3193,7 +3194,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
struct dma_attrs *attrs)
{
- void *vaddr;
+ struct page *page = NULL;
int order;
size = PAGE_ALIGN(size);
@@ -3208,17 +3209,31 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
flags |= GFP_DMA32;
}
- vaddr = (void *)__get_free_pages(flags, order);
- if (!vaddr)
+ if (flags & __GFP_WAIT) {
+ unsigned int count = size >> PAGE_SHIFT;
+
+ page = dma_alloc_from_contiguous(dev, count, order);
+ if (page && iommu_no_mapping(dev) &&
+ page_to_phys(page) + size > dev->coherent_dma_mask) {
+ dma_release_from_contiguous(dev, page, count);
+ page = NULL;
+ }
+ }
+
+ if (!page)
+ page = alloc_pages(flags, order);
+ if (!page)
return NULL;
- memset(vaddr, 0, size);
+ memset(page_address(page), 0, size);
- *dma_handle = __intel_map_single(dev, virt_to_bus(vaddr), size,
+ *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
DMA_BIDIRECTIONAL,
dev->coherent_dma_mask);
if (*dma_handle)
- return vaddr;
- free_pages((unsigned long)vaddr, order);
+ return page_address(page);
+ if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+ __free_pages(page, order);
+
return NULL;
}
@@ -3226,12 +3241,14 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, struct dma_attrs *attrs)
{
int order;
+ struct page *page = virt_to_page(vaddr);
size = PAGE_ALIGN(size);
order = get_order(size);
intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
- free_pages((unsigned long)vaddr, order);
+ if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+ __free_pages(page, order);
}
static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index c71122f9c8e8..1e81d7a09a64 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -11,9 +11,6 @@ menuconfig NEW_LEDS
Say Y to enable Linux LED support. This allows control of supported
LEDs from both userspace and optionally, by kernel events (triggers).
- This is not related to standard keyboard LEDs which are controlled
- via the input system.
-
if NEW_LEDS
config LEDS_CLASS
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 1972d57aadb3..e7fbc08a0627 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -342,7 +342,7 @@ void st_int_recv(void *disc_data,
/* Unknow packet? */
default:
type = *ptr;
- if (st_gdata->list[type] == NULL) {
+ if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) {
pr_err("chip/interface misbehavior dropping"
" frame starting with 0x%02x", type);
goto done;
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a9315..6d3e2093bf7f 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
.resume = toshoboe_wakeup
};
-static int __init
-donauboe_init (void)
-{
- return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
- pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index 1753dc693c15..2ca1a0b3ad57 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -768,15 +768,10 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
}
#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
- err = pci_enable_msix(priv->pdev, entries, ARRAY_SIZE(entries));
+ err = pci_enable_msix_exact(priv->pdev, entries, ARRAY_SIZE(entries));
if (err) {
- if (err > 0)
- dev_info(&priv->pdev->dev,
- "Only %d MSI-X vectors available, "
- "not using MSI-X\n", err);
- else
- dev_err(&priv->pdev->dev,
- "Failed to enable MSI-X (err=%d)\n", err);
+ dev_err(&priv->pdev->dev,
+ "Failed to enable MSI-X (err=%d)\n", err);
return err;
}
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 2e565f8e5165..91dbe0c92fdf 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -386,12 +386,12 @@ config RTC_DRV_PCF8583
will be called rtc-pcf8583.
config RTC_DRV_M41T80
- tristate "ST M41T62/65/M41T80/81/82/83/84/85/87"
+ tristate "ST M41T62/65/M41T80/81/82/83/84/85/87 and compatible"
help
If you say Y here you will get support for the ST M41T60
and M41T80 RTC chips series. Currently, the following chips are
supported: M41T62, M41T65, M41T80, M41T81, M41T82, M41T83, M41ST84,
- M41ST85, and M41ST87.
+ M41ST85, M41ST87, and MicroCrystal RV4162.
This driver can also be built as a module. If so, the module
will be called rtc-m41t80.
@@ -573,6 +573,16 @@ config RTC_DRV_DS1305
This driver can also be built as a module. If so, the module
will be called rtc-ds1305.
+config RTC_DRV_DS1343
+ tristate "Dallas/Maxim DS1343/DS1344"
+ help
+ If you say yes here you get support for the
+ Dallas/Maxim DS1343 and DS1344 real time clock chips.
+ Support for trickle charger, alarm is provided.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-ds1343.
+
config RTC_DRV_DS1347
tristate "Dallas/Maxim DS1347"
help
@@ -650,6 +660,14 @@ config RTC_DRV_RX4581
This driver can also be built as a module. If so the module
will be called rtc-rx4581.
+config RTC_DRV_MCP795
+ tristate "Microchip MCP795"
+ help
+ If you say yes here you will get support for the Microchip MCP795.
+
+ This driver can also be built as a module. If so the module
+ will be called rtc-mcp795.
+
endif # SPI_MASTER
comment "Platform RTC drivers"
@@ -758,6 +776,16 @@ config RTC_DRV_DA9055
This driver can also be built as a module. If so, the module
will be called rtc-da9055
+config RTC_DRV_DA9063
+ tristate "Dialog Semiconductor DA9063 RTC"
+ depends on MFD_DA9063
+ help
+ If you say yes here you will get support for the RTC subsystem
+ of the Dialog Semiconductor DA9063.
+
+ This driver can also be built as a module. If so, the module
+ will be called "rtc-da9063".
+
config RTC_DRV_EFI
tristate "EFI RTC"
depends on IA64
@@ -1327,6 +1355,15 @@ config RTC_DRV_MOXART
This driver can also be built as a module. If so, the module
will be called rtc-moxart
+config RTC_DRV_XGENE
+ tristate "APM X-Gene RTC"
+ help
+ If you say yes here you get support for the APM X-Gene SoC real time
+ clock.
+
+ This driver can also be built as a module, if so, the module
+ will be called "rtc-xgene".
+
comment "HID Sensor RTC drivers"
config RTC_DRV_HID_SENSOR_TIME
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 40a09915c8f6..70347d041d10 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o
obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o
obj-$(CONFIG_RTC_DRV_DA9052) += rtc-da9052.o
obj-$(CONFIG_RTC_DRV_DA9055) += rtc-da9055.o
+obj-$(CONFIG_RTC_DRV_DA9063) += rtc-da9063.o
obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o
obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o
obj-$(CONFIG_RTC_DRV_VRTC) += rtc-mrst.o
@@ -40,6 +41,7 @@ obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o
obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o
obj-$(CONFIG_RTC_DRV_DS1305) += rtc-ds1305.o
obj-$(CONFIG_RTC_DRV_DS1307) += rtc-ds1307.o
+obj-$(CONFIG_RTC_DRV_DS1343) += rtc-ds1343.o
obj-$(CONFIG_RTC_DRV_DS1347) += rtc-ds1347.o
obj-$(CONFIG_RTC_DRV_DS1374) += rtc-ds1374.o
obj-$(CONFIG_RTC_DRV_DS1390) += rtc-ds1390.o
@@ -80,6 +82,7 @@ obj-$(CONFIG_RTC_DRV_MAX8997) += rtc-max8997.o
obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
obj-$(CONFIG_RTC_DRV_MAX77686) += rtc-max77686.o
obj-$(CONFIG_RTC_DRV_MC13XXX) += rtc-mc13xxx.o
+obj-$(CONFIG_RTC_DRV_MCP795) += rtc-mcp795.o
obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o
obj-$(CONFIG_RTC_DRV_MPC5121) += rtc-mpc5121.o
obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o
@@ -135,5 +138,6 @@ obj-$(CONFIG_RTC_DRV_VT8500) += rtc-vt8500.o
obj-$(CONFIG_RTC_DRV_WM831X) += rtc-wm831x.o
obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o
obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o
+obj-$(CONFIG_RTC_DRV_XGENE) += rtc-xgene.o
obj-$(CONFIG_RTC_DRV_SIRFSOC) += rtc-sirfsoc.o
obj-$(CONFIG_RTC_DRV_MOXART) += rtc-moxart.o
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index c2eff6082363..5813fa52c3d4 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -292,7 +292,8 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
do {
alarm->time.tm_year++;
- } while (rtc_valid_tm(&alarm->time) != 0);
+ } while (!is_leap_year(alarm->time.tm_year + 1900)
+ && rtc_valid_tm(&alarm->time) != 0);
break;
default:
@@ -300,7 +301,16 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
}
done:
- return 0;
+ err = rtc_valid_tm(&alarm->time);
+
+ if (err) {
+ dev_warn(&rtc->dev, "invalid alarm value: %d-%d-%d %d:%d:%d\n",
+ alarm->time.tm_year + 1900, alarm->time.tm_mon + 1,
+ alarm->time.tm_mday, alarm->time.tm_hour, alarm->time.tm_min,
+ alarm->time.tm_sec);
+ }
+
+ return err;
}
int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c
index 816504846cdd..0c6add1a38dc 100644
--- a/drivers/rtc/rtc-88pm860x.c
+++ b/drivers/rtc/rtc-88pm860x.c
@@ -293,7 +293,7 @@ static int pm860x_rtc_dt_init(struct platform_device *pdev,
int ret;
if (!np)
return -ENODEV;
- np = of_find_node_by_name(np, "rtc");
+ np = of_get_child_by_name(np, "rtc");
if (!np) {
dev_err(&pdev->dev, "failed to find rtc node\n");
return -ENODEV;
@@ -301,6 +301,7 @@ static int pm860x_rtc_dt_init(struct platform_device *pdev,
ret = of_property_read_u32(np, "marvell,88pm860x-vrtc", &info->vrtc);
if (ret)
info->vrtc = 0;
+ of_node_put(np);
return 0;
}
#else
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 3281c90691c3..e3fe54ce072a 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -48,11 +48,13 @@ struct at91_rtc_config {
static const struct at91_rtc_config *at91_rtc_config;
static DECLARE_COMPLETION(at91_rtc_updated);
+static DECLARE_COMPLETION(at91_rtc_wait_upd_rdy);
static unsigned int at91_alarm_year = AT91_RTC_EPOCH;
static void __iomem *at91_rtc_regs;
static int irq;
static DEFINE_SPINLOCK(at91_rtc_lock);
static u32 at91_rtc_shadow_imr;
+static bool at91_rtc_upd_rdy;
static void at91_rtc_write_ier(u32 mask)
{
@@ -161,6 +163,8 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
+ wait_for_completion(&at91_rtc_wait_upd_rdy);
+
/* Stop Time/Calendar from counting */
cr = at91_rtc_read(AT91_RTC_CR);
at91_rtc_write(AT91_RTC_CR, cr | AT91_RTC_UPDCAL | AT91_RTC_UPDTIM);
@@ -183,6 +187,7 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
/* Restart Time/Calendar */
cr = at91_rtc_read(AT91_RTC_CR);
+ at91_rtc_upd_rdy = 0;
at91_rtc_write(AT91_RTC_CR, cr & ~(AT91_RTC_UPDCAL | AT91_RTC_UPDTIM));
return 0;
@@ -290,8 +295,13 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
if (rtsr) { /* this interrupt is shared! Is it ours? */
if (rtsr & AT91_RTC_ALARM)
events |= (RTC_AF | RTC_IRQF);
- if (rtsr & AT91_RTC_SECEV)
+ if (rtsr & AT91_RTC_SECEV) {
events |= (RTC_UF | RTC_IRQF);
+ if (!at91_rtc_upd_rdy) {
+ at91_rtc_upd_rdy = 1;
+ complete(&at91_rtc_wait_upd_rdy);
+ }
+ }
if (rtsr & AT91_RTC_ACKUPD)
complete(&at91_rtc_updated);
@@ -413,6 +423,8 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
return PTR_ERR(rtc);
platform_set_drvdata(pdev, rtc);
+ /* Enable 1Hz events */
+ at91_rtc_write_ier(AT91_RTC_SECEV);
dev_info(&pdev->dev, "AT91 Real Time Clock driver.\n");
return 0;
}
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 0c53f452849d..fe4bdb06a55a 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -346,7 +346,7 @@ static int bfin_rtc_probe(struct platform_device *pdev)
{
struct bfin_rtc *rtc;
struct device *dev = &pdev->dev;
- int ret = 0;
+ int ret;
unsigned long timeout = jiffies + HZ;
dev_dbg_stamp(dev);
@@ -361,16 +361,17 @@ static int bfin_rtc_probe(struct platform_device *pdev)
/* Register our RTC with the RTC framework */
rtc->rtc_dev = devm_rtc_device_register(dev, pdev->name, &bfin_rtc_ops,
THIS_MODULE);
- if (unlikely(IS_ERR(rtc->rtc_dev))) {
- ret = PTR_ERR(rtc->rtc_dev);
- goto err;
- }
+ if (unlikely(IS_ERR(rtc->rtc_dev)))
+ return PTR_ERR(rtc->rtc_dev);
/* Grab the IRQ and init the hardware */
ret = devm_request_irq(dev, IRQ_RTC, bfin_rtc_interrupt, 0,
pdev->name, dev);
if (unlikely(ret))
- goto err;
+ dev_err(&pdev->dev,
+ "unable to request IRQ; alarm won't work, "
+ "and writes will be delayed\n");
+
/* sometimes the bootloader touched things, but the write complete was not
* enabled, so let's just do a quick timeout here since the IRQ will not fire ...
*/
@@ -381,9 +382,6 @@ static int bfin_rtc_probe(struct platform_device *pdev)
bfin_write_RTC_SWCNT(0);
return 0;
-
-err:
- return ret;
}
static int bfin_rtc_remove(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 0963c9309c74..b0e4a3eb33c7 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -647,6 +647,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
int retval = 0;
unsigned char rtc_control;
unsigned address_space;
+ u32 flags = 0;
/* there can be only one ... */
if (cmos_rtc.dev)
@@ -660,9 +661,12 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
* REVISIT non-x86 systems may instead use memory space resources
* (needing ioremap etc), not i/o space resources like this ...
*/
- ports = request_region(ports->start,
- resource_size(ports),
- driver_name);
+ if (RTC_IOMAPPED)
+ ports = request_region(ports->start, resource_size(ports),
+ driver_name);
+ else
+ ports = request_mem_region(ports->start, resource_size(ports),
+ driver_name);
if (!ports) {
dev_dbg(dev, "i/o registers already in use\n");
return -EBUSY;
@@ -699,6 +703,11 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
* expect CMOS_READ and friends to handle.
*/
if (info) {
+ if (info->flags)
+ flags = info->flags;
+ if (info->address_space)
+ address_space = info->address_space;
+
if (info->rtc_day_alarm && info->rtc_day_alarm < 128)
cmos_rtc.day_alrm = info->rtc_day_alarm;
if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128)
@@ -726,18 +735,21 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
spin_lock_irq(&rtc_lock);
- /* force periodic irq to CMOS reset default of 1024Hz;
- *
- * REVISIT it's been reported that at least one x86_64 ALI mobo
- * doesn't use 32KHz here ... for portability we might need to
- * do something about other clock frequencies.
- */
- cmos_rtc.rtc->irq_freq = 1024;
- hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq);
- CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
+ if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+ /* force periodic irq to CMOS reset default of 1024Hz;
+ *
+ * REVISIT it's been reported that at least one x86_64 ALI
+ * mobo doesn't use 32KHz here ... for portability we might
+ * need to do something about other clock frequencies.
+ */
+ cmos_rtc.rtc->irq_freq = 1024;
+ hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq);
+ CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
+ }
/* disable irqs */
- cmos_irq_disable(&cmos_rtc, RTC_PIE | RTC_AIE | RTC_UIE);
+ if (is_valid_irq(rtc_irq))
+ cmos_irq_disable(&cmos_rtc, RTC_PIE | RTC_AIE | RTC_UIE);
rtc_control = CMOS_READ(RTC_CONTROL);
@@ -802,14 +814,18 @@ cleanup1:
cmos_rtc.dev = NULL;
rtc_device_unregister(cmos_rtc.rtc);
cleanup0:
- release_region(ports->start, resource_size(ports));
+ if (RTC_IOMAPPED)
+ release_region(ports->start, resource_size(ports));
+ else
+ release_mem_region(ports->start, resource_size(ports));
return retval;
}
-static void cmos_do_shutdown(void)
+static void cmos_do_shutdown(int rtc_irq)
{
spin_lock_irq(&rtc_lock);
- cmos_irq_disable(&cmos_rtc, RTC_IRQMASK);
+ if (is_valid_irq(rtc_irq))
+ cmos_irq_disable(&cmos_rtc, RTC_IRQMASK);
spin_unlock_irq(&rtc_lock);
}
@@ -818,7 +834,7 @@ static void __exit cmos_do_remove(struct device *dev)
struct cmos_rtc *cmos = dev_get_drvdata(dev);
struct resource *ports;
- cmos_do_shutdown();
+ cmos_do_shutdown(cmos->irq);
sysfs_remove_bin_file(&dev->kobj, &nvram);
@@ -831,7 +847,10 @@ static void __exit cmos_do_remove(struct device *dev)
cmos->rtc = NULL;
ports = cmos->iomem;
- release_region(ports->start, resource_size(ports));
+ if (RTC_IOMAPPED)
+ release_region(ports->start, resource_size(ports));
+ else
+ release_mem_region(ports->start, resource_size(ports));
cmos->iomem = NULL;
cmos->dev = NULL;
@@ -1065,10 +1084,13 @@ static void __exit cmos_pnp_remove(struct pnp_dev *pnp)
static void cmos_pnp_shutdown(struct pnp_dev *pnp)
{
- if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pnp->dev))
+ struct device *dev = &pnp->dev;
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+
+ if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(dev))
return;
- cmos_do_shutdown();
+ cmos_do_shutdown(cmos->irq);
}
static const struct pnp_device_id rtc_ids[] = {
@@ -1143,11 +1165,21 @@ static inline void cmos_of_init(struct platform_device *pdev) {}
static int __init cmos_platform_probe(struct platform_device *pdev)
{
+ struct resource *resource;
+ int irq;
+
cmos_of_init(pdev);
cmos_wake_setup(&pdev->dev);
- return cmos_do_probe(&pdev->dev,
- platform_get_resource(pdev, IORESOURCE_IO, 0),
- platform_get_irq(pdev, 0));
+
+ if (RTC_IOMAPPED)
+ resource = platform_get_resource(pdev, IORESOURCE_IO, 0);
+ else
+ resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ irq = -1;
+
+ return cmos_do_probe(&pdev->dev, resource, irq);
}
static int __exit cmos_platform_remove(struct platform_device *pdev)
@@ -1158,10 +1190,13 @@ static int __exit cmos_platform_remove(struct platform_device *pdev)
static void cmos_platform_shutdown(struct platform_device *pdev)
{
- if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pdev->dev))
+ struct device *dev = &pdev->dev;
+ struct cmos_rtc *cmos = dev_get_drvdata(dev);
+
+ if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(dev))
return;
- cmos_do_shutdown();
+ cmos_do_shutdown(cmos->irq);
}
/* work with hotplug and coldplug */
diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c
index a1cbf64242a5..e5c9486cf452 100644
--- a/drivers/rtc/rtc-da9052.c
+++ b/drivers/rtc/rtc-da9052.c
@@ -20,28 +20,28 @@
#include <linux/mfd/da9052/da9052.h>
#include <linux/mfd/da9052/reg.h>
-#define rtc_err(da9052, fmt, ...) \
- dev_err(da9052->dev, "%s: " fmt, __func__, ##__VA_ARGS__)
+#define rtc_err(rtc, fmt, ...) \
+ dev_err(rtc->da9052->dev, "%s: " fmt, __func__, ##__VA_ARGS__)
struct da9052_rtc {
struct rtc_device *rtc;
struct da9052 *da9052;
};
-static int da9052_rtc_enable_alarm(struct da9052 *da9052, bool enable)
+static int da9052_rtc_enable_alarm(struct da9052_rtc *rtc, bool enable)
{
int ret;
if (enable) {
- ret = da9052_reg_update(da9052, DA9052_ALARM_Y_REG,
- DA9052_ALARM_Y_ALARM_ON,
- DA9052_ALARM_Y_ALARM_ON);
+ ret = da9052_reg_update(rtc->da9052, DA9052_ALARM_Y_REG,
+ DA9052_ALARM_Y_ALARM_ON|DA9052_ALARM_Y_TICK_ON,
+ DA9052_ALARM_Y_ALARM_ON);
if (ret != 0)
- rtc_err(da9052, "Failed to enable ALM: %d\n", ret);
+ rtc_err(rtc, "Failed to enable ALM: %d\n", ret);
} else {
- ret = da9052_reg_update(da9052, DA9052_ALARM_Y_REG,
- DA9052_ALARM_Y_ALARM_ON, 0);
+ ret = da9052_reg_update(rtc->da9052, DA9052_ALARM_Y_REG,
+ DA9052_ALARM_Y_ALARM_ON|DA9052_ALARM_Y_TICK_ON, 0);
if (ret != 0)
- rtc_err(da9052, "Write error: %d\n", ret);
+ rtc_err(rtc, "Write error: %d\n", ret);
}
return ret;
}
@@ -49,31 +49,20 @@ static int da9052_rtc_enable_alarm(struct da9052 *da9052, bool enable)
static irqreturn_t da9052_rtc_irq(int irq, void *data)
{
struct da9052_rtc *rtc = data;
- int ret;
- ret = da9052_reg_read(rtc->da9052, DA9052_ALARM_MI_REG);
- if (ret < 0) {
- rtc_err(rtc->da9052, "Read error: %d\n", ret);
- return IRQ_NONE;
- }
-
- if (ret & DA9052_ALARMMI_ALARMTYPE) {
- da9052_rtc_enable_alarm(rtc->da9052, 0);
- rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_AF);
- } else
- rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_PF);
+ rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_AF);
return IRQ_HANDLED;
}
-static int da9052_read_alarm(struct da9052 *da9052, struct rtc_time *rtc_tm)
+static int da9052_read_alarm(struct da9052_rtc *rtc, struct rtc_time *rtc_tm)
{
int ret;
uint8_t v[5];
- ret = da9052_group_read(da9052, DA9052_ALARM_MI_REG, 5, v);
+ ret = da9052_group_read(rtc->da9052, DA9052_ALARM_MI_REG, 5, v);
if (ret != 0) {
- rtc_err(da9052, "Failed to group read ALM: %d\n", ret);
+ rtc_err(rtc, "Failed to group read ALM: %d\n", ret);
return ret;
}
@@ -84,23 +73,33 @@ static int da9052_read_alarm(struct da9052 *da9052, struct rtc_time *rtc_tm)
rtc_tm->tm_min = v[0] & DA9052_RTC_MIN;
ret = rtc_valid_tm(rtc_tm);
- if (ret != 0)
- return ret;
return ret;
}
-static int da9052_set_alarm(struct da9052 *da9052, struct rtc_time *rtc_tm)
+static int da9052_set_alarm(struct da9052_rtc *rtc, struct rtc_time *rtc_tm)
{
+ struct da9052 *da9052 = rtc->da9052;
+ unsigned long alm_time;
int ret;
uint8_t v[3];
+ ret = rtc_tm_to_time(rtc_tm, &alm_time);
+ if (ret != 0)
+ return ret;
+
+ if (rtc_tm->tm_sec > 0) {
+ alm_time += 60 - rtc_tm->tm_sec;
+ rtc_time_to_tm(alm_time, rtc_tm);
+ }
+ BUG_ON(rtc_tm->tm_sec); /* it will cause repeated irqs if not zero */
+
rtc_tm->tm_year -= 100;
rtc_tm->tm_mon += 1;
ret = da9052_reg_update(da9052, DA9052_ALARM_MI_REG,
DA9052_RTC_MIN, rtc_tm->tm_min);
if (ret != 0) {
- rtc_err(da9052, "Failed to write ALRM MIN: %d\n", ret);
+ rtc_err(rtc, "Failed to write ALRM MIN: %d\n", ret);
return ret;
}
@@ -115,22 +114,22 @@ static int da9052_set_alarm(struct da9052 *da9052, struct rtc_time *rtc_tm)
ret = da9052_reg_update(da9052, DA9052_ALARM_Y_REG,
DA9052_RTC_YEAR, rtc_tm->tm_year);
if (ret != 0)
- rtc_err(da9052, "Failed to write ALRM YEAR: %d\n", ret);
+ rtc_err(rtc, "Failed to write ALRM YEAR: %d\n", ret);
return ret;
}
-static int da9052_rtc_get_alarm_status(struct da9052 *da9052)
+static int da9052_rtc_get_alarm_status(struct da9052_rtc *rtc)
{
int ret;
- ret = da9052_reg_read(da9052, DA9052_ALARM_Y_REG);
+ ret = da9052_reg_read(rtc->da9052, DA9052_ALARM_Y_REG);
if (ret < 0) {
- rtc_err(da9052, "Failed to read ALM: %d\n", ret);
+ rtc_err(rtc, "Failed to read ALM: %d\n", ret);
return ret;
}
- ret &= DA9052_ALARM_Y_ALARM_ON;
- return (ret > 0) ? 1 : 0;
+
+ return !!(ret&DA9052_ALARM_Y_ALARM_ON);
}
static int da9052_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
@@ -141,7 +140,7 @@ static int da9052_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
ret = da9052_group_read(rtc->da9052, DA9052_COUNT_S_REG, 6, v);
if (ret < 0) {
- rtc_err(rtc->da9052, "Failed to read RTC time : %d\n", ret);
+ rtc_err(rtc, "Failed to read RTC time : %d\n", ret);
return ret;
}
@@ -153,18 +152,14 @@ static int da9052_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
rtc_tm->tm_sec = v[0] & DA9052_RTC_SEC;
ret = rtc_valid_tm(rtc_tm);
- if (ret != 0) {
- rtc_err(rtc->da9052, "rtc_valid_tm failed: %d\n", ret);
- return ret;
- }
-
- return 0;
+ return ret;
}
static int da9052_rtc_set_time(struct device *dev, struct rtc_time *tm)
{
struct da9052_rtc *rtc;
uint8_t v[6];
+ int ret;
rtc = dev_get_drvdata(dev);
@@ -175,7 +170,10 @@ static int da9052_rtc_set_time(struct device *dev, struct rtc_time *tm)
v[4] = tm->tm_mon + 1;
v[5] = tm->tm_year - 100;
- return da9052_group_write(rtc->da9052, DA9052_COUNT_S_REG, 6, v);
+ ret = da9052_group_write(rtc->da9052, DA9052_COUNT_S_REG, 6, v);
+ if (ret < 0)
+ rtc_err(rtc, "failed to set RTC time: %d\n", ret);
+ return ret;
}
static int da9052_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -184,13 +182,13 @@ static int da9052_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
struct rtc_time *tm = &alrm->time;
struct da9052_rtc *rtc = dev_get_drvdata(dev);
- ret = da9052_read_alarm(rtc->da9052, tm);
-
- if (ret)
+ ret = da9052_read_alarm(rtc, tm);
+ if (ret < 0) {
+ rtc_err(rtc, "failed to read RTC alarm: %d\n", ret);
return ret;
+ }
- alrm->enabled = da9052_rtc_get_alarm_status(rtc->da9052);
-
+ alrm->enabled = da9052_rtc_get_alarm_status(rtc);
return 0;
}
@@ -200,16 +198,15 @@ static int da9052_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
struct rtc_time *tm = &alrm->time;
struct da9052_rtc *rtc = dev_get_drvdata(dev);
- ret = da9052_rtc_enable_alarm(rtc->da9052, 0);
+ ret = da9052_rtc_enable_alarm(rtc, 0);
if (ret < 0)
return ret;
- ret = da9052_set_alarm(rtc->da9052, tm);
- if (ret)
+ ret = da9052_set_alarm(rtc, tm);
+ if (ret < 0)
return ret;
- ret = da9052_rtc_enable_alarm(rtc->da9052, 1);
-
+ ret = da9052_rtc_enable_alarm(rtc, 1);
return ret;
}
@@ -217,7 +214,7 @@ static int da9052_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct da9052_rtc *rtc = dev_get_drvdata(dev);
- return da9052_rtc_enable_alarm(rtc->da9052, enabled);
+ return da9052_rtc_enable_alarm(rtc, enabled);
}
static const struct rtc_class_ops da9052_rtc_ops = {
@@ -239,10 +236,23 @@ static int da9052_rtc_probe(struct platform_device *pdev)
rtc->da9052 = dev_get_drvdata(pdev->dev.parent);
platform_set_drvdata(pdev, rtc);
+
+ ret = da9052_reg_write(rtc->da9052, DA9052_BBAT_CONT_REG, 0xFE);
+ if (ret < 0) {
+ rtc_err(rtc,
+ "Failed to setup RTC battery charging: %d\n", ret);
+ return ret;
+ }
+
+ ret = da9052_reg_update(rtc->da9052, DA9052_ALARM_Y_REG,
+ DA9052_ALARM_Y_TICK_ON, 0);
+ if (ret != 0)
+ rtc_err(rtc, "Failed to disable TICKS: %d\n", ret);
+
ret = da9052_request_irq(rtc->da9052, DA9052_IRQ_ALARM, "ALM",
da9052_rtc_irq, rtc);
if (ret != 0) {
- rtc_err(rtc->da9052, "irq registration failed: %d\n", ret);
+ rtc_err(rtc, "irq registration failed: %d\n", ret);
return ret;
}
@@ -261,7 +271,7 @@ static struct platform_driver da9052_rtc_driver = {
module_platform_driver(da9052_rtc_driver);
-MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
+MODULE_AUTHOR("Anthony Olech <Anthony.Olech@diasemi.com>");
MODULE_DESCRIPTION("RTC driver for Dialog DA9052 PMIC");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:da9052-rtc");
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
new file mode 100644
index 000000000000..595393098b09
--- /dev/null
+++ b/drivers/rtc/rtc-da9063.c
@@ -0,0 +1,333 @@
+/* rtc-da9063.c - Real time clock device driver for DA9063
+ * Copyright (C) 2013-14 Dialog Semiconductor Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/regmap.h>
+#include <linux/mfd/da9063/registers.h>
+#include <linux/mfd/da9063/core.h>
+
+#define YEARS_TO_DA9063(year) ((year) - 100)
+#define MONTHS_TO_DA9063(month) ((month) + 1)
+#define YEARS_FROM_DA9063(year) ((year) + 100)
+#define MONTHS_FROM_DA9063(month) ((month) - 1)
+
+#define RTC_DATA_LEN (DA9063_REG_COUNT_Y - DA9063_REG_COUNT_S + 1)
+#define RTC_SEC 0
+#define RTC_MIN 1
+#define RTC_HOUR 2
+#define RTC_DAY 3
+#define RTC_MONTH 4
+#define RTC_YEAR 5
+
+struct da9063_rtc {
+ struct rtc_device *rtc_dev;
+ struct da9063 *hw;
+ struct rtc_time alarm_time;
+ bool rtc_sync;
+};
+
+static void da9063_data_to_tm(u8 *data, struct rtc_time *tm)
+{
+ tm->tm_sec = data[RTC_SEC] & DA9063_COUNT_SEC_MASK;
+ tm->tm_min = data[RTC_MIN] & DA9063_COUNT_MIN_MASK;
+ tm->tm_hour = data[RTC_HOUR] & DA9063_COUNT_HOUR_MASK;
+ tm->tm_mday = data[RTC_DAY] & DA9063_COUNT_DAY_MASK;
+ tm->tm_mon = MONTHS_FROM_DA9063(data[RTC_MONTH] &
+ DA9063_COUNT_MONTH_MASK);
+ tm->tm_year = YEARS_FROM_DA9063(data[RTC_YEAR] &
+ DA9063_COUNT_YEAR_MASK);
+}
+
+static void da9063_tm_to_data(struct rtc_time *tm, u8 *data)
+{
+ data[RTC_SEC] &= ~DA9063_COUNT_SEC_MASK;
+ data[RTC_SEC] |= tm->tm_sec & DA9063_COUNT_SEC_MASK;
+
+ data[RTC_MIN] &= ~DA9063_COUNT_MIN_MASK;
+ data[RTC_MIN] |= tm->tm_min & DA9063_COUNT_MIN_MASK;
+
+ data[RTC_HOUR] &= ~DA9063_COUNT_HOUR_MASK;
+ data[RTC_HOUR] |= tm->tm_hour & DA9063_COUNT_HOUR_MASK;
+
+ data[RTC_DAY] &= ~DA9063_COUNT_DAY_MASK;
+ data[RTC_DAY] |= tm->tm_mday & DA9063_COUNT_DAY_MASK;
+
+ data[RTC_MONTH] &= ~DA9063_COUNT_MONTH_MASK;
+ data[RTC_MONTH] |= MONTHS_TO_DA9063(tm->tm_mon) &
+ DA9063_COUNT_MONTH_MASK;
+
+ data[RTC_YEAR] &= ~DA9063_COUNT_YEAR_MASK;
+ data[RTC_YEAR] |= YEARS_TO_DA9063(tm->tm_year) &
+ DA9063_COUNT_YEAR_MASK;
+}
+
+static int da9063_rtc_stop_alarm(struct device *dev)
+{
+ struct da9063_rtc *rtc = dev_get_drvdata(dev);
+
+ return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+ DA9063_ALARM_ON, 0);
+}
+
+static int da9063_rtc_start_alarm(struct device *dev)
+{
+ struct da9063_rtc *rtc = dev_get_drvdata(dev);
+
+ return regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+ DA9063_ALARM_ON, DA9063_ALARM_ON);
+}
+
+static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ struct da9063_rtc *rtc = dev_get_drvdata(dev);
+ unsigned long tm_secs;
+ unsigned long al_secs;
+ u8 data[RTC_DATA_LEN];
+ int ret;
+
+ ret = regmap_bulk_read(rtc->hw->regmap, DA9063_REG_COUNT_S,
+ data, RTC_DATA_LEN);
+ if (ret < 0) {
+ dev_err(dev, "Failed to read RTC time data: %d\n", ret);
+ return ret;
+ }
+
+ if (!(data[RTC_SEC] & DA9063_RTC_READ)) {
+ dev_dbg(dev, "RTC not yet ready to be read by the host\n");
+ return -EINVAL;
+ }
+
+ da9063_data_to_tm(data, tm);
+
+ rtc_tm_to_time(tm, &tm_secs);
+ rtc_tm_to_time(&rtc->alarm_time, &al_secs);
+
+ /* handle the rtc synchronisation delay */
+ if (rtc->rtc_sync == true && al_secs - tm_secs == 1)
+ memcpy(tm, &rtc->alarm_time, sizeof(struct rtc_time));
+ else
+ rtc->rtc_sync = false;
+
+ return rtc_valid_tm(tm);
+}
+
+static int da9063_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+ struct da9063_rtc *rtc = dev_get_drvdata(dev);
+ u8 data[RTC_DATA_LEN];
+ int ret;
+
+ da9063_tm_to_data(tm, data);
+ ret = regmap_bulk_write(rtc->hw->regmap, DA9063_REG_COUNT_S,
+ data, RTC_DATA_LEN);
+ if (ret < 0)
+ dev_err(dev, "Failed to set RTC time data: %d\n", ret);
+
+ return ret;
+}
+
+static int da9063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct da9063_rtc *rtc = dev_get_drvdata(dev);
+ u8 data[RTC_DATA_LEN];
+ int ret;
+ unsigned int val;
+
+ ret = regmap_bulk_read(rtc->hw->regmap, DA9063_REG_ALARM_S,
+ &data[RTC_SEC], RTC_DATA_LEN);
+ if (ret < 0)
+ return ret;
+
+ da9063_data_to_tm(data, &alrm->time);
+
+ alrm->enabled = !!(data[RTC_YEAR] & DA9063_ALARM_ON);
+
+ ret = regmap_read(rtc->hw->regmap, DA9063_REG_EVENT_A, &val);
+ if (ret < 0)
+ return ret;
+
+ if (val & (DA9063_E_ALARM))
+ alrm->pending = 1;
+ else
+ alrm->pending = 0;
+
+ return 0;
+}
+
+static int da9063_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct da9063_rtc *rtc = dev_get_drvdata(dev);
+ u8 data[RTC_DATA_LEN];
+ int ret;
+
+ da9063_tm_to_data(&alrm->time, data);
+
+ ret = da9063_rtc_stop_alarm(dev);
+ if (ret < 0) {
+ dev_err(dev, "Failed to stop alarm: %d\n", ret);
+ return ret;
+ }
+
+ ret = regmap_bulk_write(rtc->hw->regmap, DA9063_REG_ALARM_S,
+ data, RTC_DATA_LEN);
+ if (ret < 0) {
+ dev_err(dev, "Failed to write alarm: %d\n", ret);
+ return ret;
+ }
+
+ rtc->alarm_time = alrm->time;
+
+ if (alrm->enabled) {
+ ret = da9063_rtc_start_alarm(dev);
+ if (ret < 0) {
+ dev_err(dev, "Failed to start alarm: %d\n", ret);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static int da9063_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ if (enabled)
+ return da9063_rtc_start_alarm(dev);
+ else
+ return da9063_rtc_stop_alarm(dev);
+}
+
+static irqreturn_t da9063_alarm_event(int irq, void *data)
+{
+ struct da9063_rtc *rtc = data;
+
+ regmap_update_bits(rtc->hw->regmap, DA9063_REG_ALARM_Y,
+ DA9063_ALARM_ON, 0);
+
+ rtc->rtc_sync = true;
+ rtc_update_irq(rtc->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+ return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops da9063_rtc_ops = {
+ .read_time = da9063_rtc_read_time,
+ .set_time = da9063_rtc_set_time,
+ .read_alarm = da9063_rtc_read_alarm,
+ .set_alarm = da9063_rtc_set_alarm,
+ .alarm_irq_enable = da9063_rtc_alarm_irq_enable,
+};
+
+static int da9063_rtc_probe(struct platform_device *pdev)
+{
+ struct da9063 *da9063 = dev_get_drvdata(pdev->dev.parent);
+ struct da9063_rtc *rtc;
+ int irq_alarm;
+ u8 data[RTC_DATA_LEN];
+ int ret;
+
+ ret = regmap_update_bits(da9063->regmap, DA9063_REG_CONTROL_E,
+ DA9063_RTC_EN, DA9063_RTC_EN);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to enable RTC\n");
+ goto err;
+ }
+
+ ret = regmap_update_bits(da9063->regmap, DA9063_REG_EN_32K,
+ DA9063_CRYSTAL, DA9063_CRYSTAL);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to run 32kHz oscillator\n");
+ goto err;
+ }
+
+ ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S,
+ DA9063_ALARM_STATUS_TICK | DA9063_ALARM_STATUS_ALARM,
+ 0);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to access RTC alarm register\n");
+ goto err;
+ }
+
+ ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_S,
+ DA9063_ALARM_STATUS_ALARM,
+ DA9063_ALARM_STATUS_ALARM);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to access RTC alarm register\n");
+ goto err;
+ }
+
+ ret = regmap_update_bits(da9063->regmap, DA9063_REG_ALARM_Y,
+ DA9063_TICK_ON, 0);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to disable TICKs\n");
+ goto err;
+ }
+
+ ret = regmap_bulk_read(da9063->regmap, DA9063_REG_ALARM_S,
+ data, RTC_DATA_LEN);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to read initial alarm data: %d\n",
+ ret);
+ goto err;
+ }
+
+ rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+ if (!rtc)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, rtc);
+
+ irq_alarm = platform_get_irq_byname(pdev, "ALARM");
+ ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL,
+ da9063_alarm_event,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ "ALARM", rtc);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
+ irq_alarm, ret);
+ goto err;
+ }
+
+ rtc->hw = da9063;
+ rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, DA9063_DRVNAME_RTC,
+ &da9063_rtc_ops, THIS_MODULE);
+ if (IS_ERR(rtc->rtc_dev))
+ return PTR_ERR(rtc->rtc_dev);
+
+ da9063_data_to_tm(data, &rtc->alarm_time);
+ rtc->rtc_sync = false;
+err:
+ return ret;
+}
+
+static struct platform_driver da9063_rtc_driver = {
+ .probe = da9063_rtc_probe,
+ .driver = {
+ .name = DA9063_DRVNAME_RTC,
+ .owner = THIS_MODULE,
+ },
+};
+
+module_platform_driver(da9063_rtc_driver);
+
+MODULE_AUTHOR("S Twiss <stwiss.opensource@diasemi.com>");
+MODULE_DESCRIPTION("Real time clock device driver for Dialog DA9063");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DA9063_DRVNAME_RTC);
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
new file mode 100644
index 000000000000..8ccc75021175
--- /dev/null
+++ b/drivers/rtc/rtc-ds1343.c
@@ -0,0 +1,679 @@
+/* rtc-ds1343.c
+ *
+ * Driver for Dallas Semiconductor DS1343 Low Current, SPI Compatible
+ * Real Time Clock
+ *
+ * Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+#include <linux/regmap.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+
+#define DS1343_DRV_VERSION "01.00"
+#define DALLAS_MAXIM_DS1343 0
+#define DALLAS_MAXIM_DS1344 1
+
+/* RTC DS1343 Registers */
+#define DS1343_SECONDS_REG 0x00
+#define DS1343_MINUTES_REG 0x01
+#define DS1343_HOURS_REG 0x02
+#define DS1343_DAY_REG 0x03
+#define DS1343_DATE_REG 0x04
+#define DS1343_MONTH_REG 0x05
+#define DS1343_YEAR_REG 0x06
+#define DS1343_ALM0_SEC_REG 0x07
+#define DS1343_ALM0_MIN_REG 0x08
+#define DS1343_ALM0_HOUR_REG 0x09
+#define DS1343_ALM0_DAY_REG 0x0A
+#define DS1343_ALM1_SEC_REG 0x0B
+#define DS1343_ALM1_MIN_REG 0x0C
+#define DS1343_ALM1_HOUR_REG 0x0D
+#define DS1343_ALM1_DAY_REG 0x0E
+#define DS1343_CONTROL_REG 0x0F
+#define DS1343_STATUS_REG 0x10
+#define DS1343_TRICKLE_REG 0x11
+
+/* DS1343 Control Registers bits */
+#define DS1343_EOSC 0x80
+#define DS1343_DOSF 0x20
+#define DS1343_EGFIL 0x10
+#define DS1343_SQW 0x08
+#define DS1343_INTCN 0x04
+#define DS1343_A1IE 0x02
+#define DS1343_A0IE 0x01
+
+/* DS1343 Status Registers bits */
+#define DS1343_OSF 0x80
+#define DS1343_IRQF1 0x02
+#define DS1343_IRQF0 0x01
+
+/* DS1343 Trickle Charger Registers bits */
+#define DS1343_TRICKLE_MAGIC 0xa0
+#define DS1343_TRICKLE_DS1 0x08
+#define DS1343_TRICKLE_1K 0x01
+#define DS1343_TRICKLE_2K 0x02
+#define DS1343_TRICKLE_4K 0x03
+
+static const struct spi_device_id ds1343_id[] = {
+ { "ds1343", DALLAS_MAXIM_DS1343 },
+ { "ds1344", DALLAS_MAXIM_DS1344 },
+ { }
+};
+MODULE_DEVICE_TABLE(spi, ds1343_id);
+
+struct ds1343_priv {
+ struct spi_device *spi;
+ struct rtc_device *rtc;
+ struct regmap *map;
+ struct mutex mutex;
+ unsigned int irqen;
+ int alarm_sec;
+ int alarm_min;
+ int alarm_hour;
+ int alarm_mday;
+};
+
+static int ds1343_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+#ifdef RTC_SET_CHARGE
+ case RTC_SET_CHARGE:
+ {
+ int val;
+
+ if (copy_from_user(&val, (int __user *)arg, sizeof(int)))
+ return -EFAULT;
+
+ return regmap_write(priv->map, DS1343_TRICKLE_REG, val);
+ }
+ break;
+#endif
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+static ssize_t ds1343_show_glitchfilter(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ int glitch_filt_status, data;
+
+ regmap_read(priv->map, DS1343_CONTROL_REG, &data);
+
+ glitch_filt_status = !!(data & DS1343_EGFIL);
+
+ if (glitch_filt_status)
+ return sprintf(buf, "enabled\n");
+ else
+ return sprintf(buf, "disabled\n");
+}
+
+static ssize_t ds1343_store_glitchfilter(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ int data;
+
+ regmap_read(priv->map, DS1343_CONTROL_REG, &data);
+
+ if (strncmp(buf, "enabled", 7) == 0)
+ data |= DS1343_EGFIL;
+
+ else if (strncmp(buf, "disabled", 8) == 0)
+ data &= ~(DS1343_EGFIL);
+
+ else
+ return -EINVAL;
+
+ regmap_write(priv->map, DS1343_CONTROL_REG, data);
+
+ return count;
+}
+
+static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
+ ds1343_store_glitchfilter);
+
+static ssize_t ds1343_show_alarmstatus(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ int alarmstatus, data;
+
+ regmap_read(priv->map, DS1343_CONTROL_REG, &data);
+
+ alarmstatus = !!(data & DS1343_A0IE);
+
+ if (alarmstatus)
+ return sprintf(buf, "enabled\n");
+ else
+ return sprintf(buf, "disabled\n");
+}
+
+static DEVICE_ATTR(alarm_status, S_IRUGO, ds1343_show_alarmstatus, NULL);
+
+static ssize_t ds1343_show_alarmmode(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ int alarm_mode, data;
+ char *alarm_str;
+
+ regmap_read(priv->map, DS1343_ALM0_SEC_REG, &data);
+ alarm_mode = (data & 0x80) >> 4;
+
+ regmap_read(priv->map, DS1343_ALM0_MIN_REG, &data);
+ alarm_mode |= (data & 0x80) >> 5;
+
+ regmap_read(priv->map, DS1343_ALM0_HOUR_REG, &data);
+ alarm_mode |= (data & 0x80) >> 6;
+
+ regmap_read(priv->map, DS1343_ALM0_DAY_REG, &data);
+ alarm_mode |= (data & 0x80) >> 7;
+
+ switch (alarm_mode) {
+ case 15:
+ alarm_str = "each second";
+ break;
+
+ case 7:
+ alarm_str = "seconds match";
+ break;
+
+ case 3:
+ alarm_str = "minutes and seconds match";
+ break;
+
+ case 1:
+ alarm_str = "hours, minutes and seconds match";
+ break;
+
+ case 0:
+ alarm_str = "day, hours, minutes and seconds match";
+ break;
+
+ default:
+ alarm_str = "invalid";
+ break;
+ }
+
+ return sprintf(buf, "%s\n", alarm_str);
+}
+
+static DEVICE_ATTR(alarm_mode, S_IRUGO, ds1343_show_alarmmode, NULL);
+
+static ssize_t ds1343_show_tricklecharger(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ int data;
+ char *diodes = "disabled", *resistors = " ";
+
+ regmap_read(priv->map, DS1343_TRICKLE_REG, &data);
+
+ if ((data & 0xf0) == DS1343_TRICKLE_MAGIC) {
+ switch (data & 0x0c) {
+ case DS1343_TRICKLE_DS1:
+ diodes = "one diode,";
+ break;
+
+ default:
+ diodes = "no diode,";
+ break;
+ }
+
+ switch (data & 0x03) {
+ case DS1343_TRICKLE_1K:
+ resistors = "1k Ohm";
+ break;
+
+ case DS1343_TRICKLE_2K:
+ resistors = "2k Ohm";
+ break;
+
+ case DS1343_TRICKLE_4K:
+ resistors = "4k Ohm";
+ break;
+
+ default:
+ diodes = "disabled";
+ break;
+ }
+ }
+
+ return sprintf(buf, "%s %s\n", diodes, resistors);
+}
+
+static DEVICE_ATTR(trickle_charger, S_IRUGO, ds1343_show_tricklecharger, NULL);
+
+static int ds1343_sysfs_register(struct device *dev)
+{
+ int err;
+
+ err = device_create_file(dev, &dev_attr_glitch_filter);
+ if (err)
+ return err;
+
+ err = device_create_file(dev, &dev_attr_alarm_status);
+ if (err)
+ goto error1;
+
+ err = device_create_file(dev, &dev_attr_alarm_mode);
+ if (err)
+ goto error2;
+
+ err = device_create_file(dev, &dev_attr_trickle_charger);
+ if (!err)
+ return err;
+
+ device_remove_file(dev, &dev_attr_alarm_mode);
+
+error2:
+ device_remove_file(dev, &dev_attr_alarm_status);
+
+error1:
+ device_remove_file(dev, &dev_attr_glitch_filter);
+
+ return err;
+}
+
+static void ds1343_sysfs_unregister(struct device *dev)
+{
+ device_remove_file(dev, &dev_attr_glitch_filter);
+ device_remove_file(dev, &dev_attr_alarm_status);
+ device_remove_file(dev, &dev_attr_alarm_mode);
+ device_remove_file(dev, &dev_attr_trickle_charger);
+}
+
+static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ unsigned char buf[7];
+ int res;
+
+ res = regmap_bulk_read(priv->map, DS1343_SECONDS_REG, buf, 7);
+ if (res)
+ return res;
+
+ dt->tm_sec = bcd2bin(buf[0]);
+ dt->tm_min = bcd2bin(buf[1]);
+ dt->tm_hour = bcd2bin(buf[2] & 0x3F);
+ dt->tm_wday = bcd2bin(buf[3]) - 1;
+ dt->tm_mday = bcd2bin(buf[4]);
+ dt->tm_mon = bcd2bin(buf[5] & 0x1F) - 1;
+ dt->tm_year = bcd2bin(buf[6]) + 100; /* year offset from 1900 */
+
+ return rtc_valid_tm(dt);
+}
+
+static int ds1343_set_time(struct device *dev, struct rtc_time *dt)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ int res;
+
+ res = regmap_write(priv->map, DS1343_SECONDS_REG,
+ bin2bcd(dt->tm_sec));
+ if (res)
+ return res;
+
+ res = regmap_write(priv->map, DS1343_MINUTES_REG,
+ bin2bcd(dt->tm_min));
+ if (res)
+ return res;
+
+ res = regmap_write(priv->map, DS1343_HOURS_REG,
+ bin2bcd(dt->tm_hour) & 0x3F);
+ if (res)
+ return res;
+
+ res = regmap_write(priv->map, DS1343_DAY_REG,
+ bin2bcd(dt->tm_wday + 1));
+ if (res)
+ return res;
+
+ res = regmap_write(priv->map, DS1343_DATE_REG,
+ bin2bcd(dt->tm_mday));
+ if (res)
+ return res;
+
+ res = regmap_write(priv->map, DS1343_MONTH_REG,
+ bin2bcd(dt->tm_mon + 1));
+ if (res)
+ return res;
+
+ dt->tm_year %= 100;
+
+ res = regmap_write(priv->map, DS1343_YEAR_REG,
+ bin2bcd(dt->tm_year));
+ if (res)
+ return res;
+
+ return 0;
+}
+
+static int ds1343_update_alarm(struct device *dev)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ unsigned int control, stat;
+ unsigned char buf[4];
+ int res = 0;
+
+ res = regmap_read(priv->map, DS1343_CONTROL_REG, &control);
+ if (res)
+ return res;
+
+ res = regmap_read(priv->map, DS1343_STATUS_REG, &stat);
+ if (res)
+ return res;
+
+ control &= ~(DS1343_A0IE);
+ stat &= ~(DS1343_IRQF0);
+
+ res = regmap_write(priv->map, DS1343_CONTROL_REG, control);
+ if (res)
+ return res;
+
+ res = regmap_write(priv->map, DS1343_STATUS_REG, stat);
+ if (res)
+ return res;
+
+ buf[0] = priv->alarm_sec < 0 || (priv->irqen & RTC_UF) ?
+ 0x80 : bin2bcd(priv->alarm_sec) & 0x7F;
+ buf[1] = priv->alarm_min < 0 || (priv->irqen & RTC_UF) ?
+ 0x80 : bin2bcd(priv->alarm_min) & 0x7F;
+ buf[2] = priv->alarm_hour < 0 || (priv->irqen & RTC_UF) ?
+ 0x80 : bin2bcd(priv->alarm_hour) & 0x3F;
+ buf[3] = priv->alarm_mday < 0 || (priv->irqen & RTC_UF) ?
+ 0x80 : bin2bcd(priv->alarm_mday) & 0x7F;
+
+ res = regmap_bulk_write(priv->map, DS1343_ALM0_SEC_REG, buf, 4);
+ if (res)
+ return res;
+
+ if (priv->irqen) {
+ control |= DS1343_A0IE;
+ res = regmap_write(priv->map, DS1343_CONTROL_REG, control);
+ }
+
+ return res;
+}
+
+static int ds1343_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ struct spi_device *spi = priv->spi;
+ int res = 0;
+ unsigned int stat;
+
+ if (spi->irq <= 0)
+ return -EINVAL;
+
+ mutex_lock(&priv->mutex);
+
+ res = regmap_read(priv->map, DS1343_STATUS_REG, &stat);
+ if (res)
+ goto out;
+
+ alarm->enabled = !!(priv->irqen & RTC_AF);
+ alarm->pending = !!(stat & DS1343_IRQF0);
+
+ alarm->time.tm_sec = priv->alarm_sec < 0 ? 0 : priv->alarm_sec;
+ alarm->time.tm_min = priv->alarm_min < 0 ? 0 : priv->alarm_min;
+ alarm->time.tm_hour = priv->alarm_hour < 0 ? 0 : priv->alarm_hour;
+ alarm->time.tm_mday = priv->alarm_mday < 0 ? 0 : priv->alarm_mday;
+
+ alarm->time.tm_mon = -1;
+ alarm->time.tm_year = -1;
+ alarm->time.tm_wday = -1;
+ alarm->time.tm_yday = -1;
+ alarm->time.tm_isdst = -1;
+
+out:
+ mutex_unlock(&priv->mutex);
+ return res;
+}
+
+static int ds1343_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ struct spi_device *spi = priv->spi;
+ int res = 0;
+
+ if (spi->irq <= 0)
+ return -EINVAL;
+
+ mutex_lock(&priv->mutex);
+
+ priv->alarm_sec = alarm->time.tm_sec;
+ priv->alarm_min = alarm->time.tm_min;
+ priv->alarm_hour = alarm->time.tm_hour;
+ priv->alarm_mday = alarm->time.tm_mday;
+
+ if (alarm->enabled)
+ priv->irqen |= RTC_AF;
+
+ res = ds1343_update_alarm(dev);
+
+ mutex_unlock(&priv->mutex);
+
+ return res;
+}
+
+static int ds1343_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+ struct spi_device *spi = priv->spi;
+ int res = 0;
+
+ if (spi->irq <= 0)
+ return -EINVAL;
+
+ mutex_lock(&priv->mutex);
+
+ if (enabled)
+ priv->irqen |= RTC_AF;
+ else
+ priv->irqen &= ~RTC_AF;
+
+ res = ds1343_update_alarm(dev);
+
+ mutex_unlock(&priv->mutex);
+
+ return res;
+}
+
+static irqreturn_t ds1343_thread(int irq, void *dev_id)
+{
+ struct ds1343_priv *priv = dev_id;
+ unsigned int stat, control;
+ int res = 0;
+
+ mutex_lock(&priv->mutex);
+
+ res = regmap_read(priv->map, DS1343_STATUS_REG, &stat);
+ if (res)
+ goto out;
+
+ if (stat & DS1343_IRQF0) {
+ stat &= ~DS1343_IRQF0;
+ regmap_write(priv->map, DS1343_STATUS_REG, stat);
+
+ res = regmap_read(priv->map, DS1343_CONTROL_REG, &control);
+ if (res)
+ goto out;
+
+ control &= ~DS1343_A0IE;
+ regmap_write(priv->map, DS1343_CONTROL_REG, control);
+
+ rtc_update_irq(priv->rtc, 1, RTC_AF | RTC_IRQF);
+ }
+
+out:
+ mutex_unlock(&priv->mutex);
+ return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops ds1343_rtc_ops = {
+ .ioctl = ds1343_ioctl,
+ .read_time = ds1343_read_time,
+ .set_time = ds1343_set_time,
+ .read_alarm = ds1343_read_alarm,
+ .set_alarm = ds1343_set_alarm,
+ .alarm_irq_enable = ds1343_alarm_irq_enable,
+};
+
+static int ds1343_probe(struct spi_device *spi)
+{
+ struct ds1343_priv *priv;
+ struct regmap_config config;
+ unsigned int data;
+ int res;
+
+ memset(&config, 0, sizeof(config));
+ config.reg_bits = 8;
+ config.val_bits = 8;
+ config.write_flag_mask = 0x80;
+
+ priv = devm_kzalloc(&spi->dev, sizeof(struct ds1343_priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->spi = spi;
+ mutex_init(&priv->mutex);
+
+ /* RTC DS1347 works in spi mode 3 and
+ * its chip select is active high
+ */
+ spi->mode = SPI_MODE_3 | SPI_CS_HIGH;
+ spi->bits_per_word = 8;
+ res = spi_setup(spi);
+ if (res)
+ return res;
+
+ spi_set_drvdata(spi, priv);
+
+ priv->map = devm_regmap_init_spi(spi, &config);
+
+ if (IS_ERR(priv->map)) {
+ dev_err(&spi->dev, "spi regmap init failed for rtc ds1343\n");
+ return PTR_ERR(priv->map);
+ }
+
+ res = regmap_read(priv->map, DS1343_SECONDS_REG, &data);
+ if (res)
+ return res;
+
+ regmap_read(priv->map, DS1343_CONTROL_REG, &data);
+ data |= DS1343_INTCN;
+ data &= ~(DS1343_EOSC | DS1343_A1IE | DS1343_A0IE);
+ regmap_write(priv->map, DS1343_CONTROL_REG, data);
+
+ regmap_read(priv->map, DS1343_STATUS_REG, &data);
+ data &= ~(DS1343_OSF | DS1343_IRQF1 | DS1343_IRQF0);
+ regmap_write(priv->map, DS1343_STATUS_REG, data);
+
+ priv->rtc = devm_rtc_device_register(&spi->dev, "ds1343",
+ &ds1343_rtc_ops, THIS_MODULE);
+ if (IS_ERR(priv->rtc)) {
+ dev_err(&spi->dev, "unable to register rtc ds1343\n");
+ return PTR_ERR(priv->rtc);
+ }
+
+ if (spi->irq >= 0) {
+ res = devm_request_threaded_irq(&spi->dev, spi->irq, NULL,
+ ds1343_thread,
+ IRQF_NO_SUSPEND | IRQF_ONESHOT,
+ "ds1343", priv);
+ if (res) {
+ dev_err(&spi->dev,
+ "unable to request irq for rtc ds1343\n");
+ return res;
+ }
+
+ device_set_wakeup_capable(&spi->dev, 1);
+ }
+
+ res = ds1343_sysfs_register(&spi->dev);
+ if (res)
+ dev_err(&spi->dev,
+ "unable to create sysfs entries for rtc ds1343\n");
+
+ return 0;
+}
+
+static int ds1343_remove(struct spi_device *spi)
+{
+ struct ds1343_priv *priv = spi_get_drvdata(spi);
+
+ if (spi->irq) {
+ mutex_lock(&priv->mutex);
+ priv->irqen &= ~RTC_AF;
+ mutex_unlock(&priv->mutex);
+
+ devm_free_irq(&spi->dev, spi->irq, priv);
+ }
+
+ spi_set_drvdata(spi, NULL);
+
+ ds1343_sysfs_unregister(&spi->dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int ds1343_suspend(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+
+ if (spi->irq >= 0 && device_may_wakeup(dev))
+ enable_irq_wake(spi->irq);
+
+ return 0;
+}
+
+static int ds1343_resume(struct device *dev)
+{
+ struct spi_device *spi = to_spi_device(dev);
+
+ if (spi->irq >= 0 && device_may_wakeup(dev))
+ disable_irq_wake(spi->irq);
+
+ return 0;
+}
+
+#endif
+
+static SIMPLE_DEV_PM_OPS(ds1343_pm, ds1343_suspend, ds1343_resume);
+
+static struct spi_driver ds1343_driver = {
+ .driver = {
+ .name = "ds1343",
+ .owner = THIS_MODULE,
+ .pm = &ds1343_pm,
+ },
+ .probe = ds1343_probe,
+ .remove = ds1343_remove,
+ .id_table = ds1343_id,
+};
+
+module_spi_driver(ds1343_driver);
+
+MODULE_DESCRIPTION("DS1343 RTC SPI Driver");
+MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DS1343_DRV_VERSION);
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 942103dac30f..c6b2191a4128 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -219,7 +219,7 @@ static int ds1742_rtc_remove(struct platform_device *pdev)
return 0;
}
-static struct of_device_id __maybe_unused ds1742_rtc_of_match[] = {
+static const struct of_device_id __maybe_unused ds1742_rtc_of_match[] = {
{ .compatible = "maxim,ds1742", },
{ }
};
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index 797aa0252ba9..c4c38431012e 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -35,7 +35,7 @@ static inline int
compute_yday(efi_time_t *eft)
{
/* efi_time_t.month is in the [1-12] so, we need -1 */
- return rtc_year_days(eft->day - 1, eft->month - 1, eft->year);
+ return rtc_year_days(eft->day, eft->month - 1, eft->year);
}
/*
* returns day of the week [0-6] 0=Sunday
diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c
index e5f13c4310fe..e3290abbb03e 100644
--- a/drivers/rtc/rtc-hym8563.c
+++ b/drivers/rtc/rtc-hym8563.c
@@ -585,7 +585,7 @@ static const struct i2c_device_id hym8563_id[] = {
};
MODULE_DEVICE_TABLE(i2c, hym8563_id);
-static struct of_device_id hym8563_dt_idtable[] = {
+static const struct of_device_id hym8563_dt_idtable[] = {
{ .compatible = "haoyu,hym8563" },
{},
};
diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c
index 41bd76aaff76..455b601d731d 100644
--- a/drivers/rtc/rtc-isl12057.c
+++ b/drivers/rtc/rtc-isl12057.c
@@ -278,7 +278,7 @@ static int isl12057_probe(struct i2c_client *client,
}
#ifdef CONFIG_OF
-static struct of_device_id isl12057_dt_match[] = {
+static const struct of_device_id isl12057_dt_match[] = {
{ .compatible = "isl,isl12057" },
{ },
};
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index a5248aa1abf1..7ff7427c2e6a 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -66,8 +66,6 @@
#define M41T80_FEATURE_WD (1 << 3) /* Extra watchdog resolution */
#define M41T80_FEATURE_SQ_ALT (1 << 4) /* RSx bits are in reg 4 */
-#define DRV_VERSION "0.05"
-
static DEFINE_MUTEX(m41t80_rtc_mutex);
static const struct i2c_device_id m41t80_id[] = {
{ "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT },
@@ -80,6 +78,7 @@ static const struct i2c_device_id m41t80_id[] = {
{ "m41st84", M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ },
{ "m41st85", M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ },
{ "m41st87", M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ },
+ { "rv4162", M41T80_FEATURE_SQ | M41T80_FEATURE_WD | M41T80_FEATURE_SQ_ALT },
{ }
};
MODULE_DEVICE_TABLE(i2c, m41t80_id);
@@ -232,7 +231,7 @@ static ssize_t m41t80_sysfs_show_flags(struct device *dev,
val = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS);
if (val < 0)
- return -EIO;
+ return val;
return sprintf(buf, "%#x\n", val);
}
static DEVICE_ATTR(flags, S_IRUGO, m41t80_sysfs_show_flags, NULL);
@@ -252,7 +251,7 @@ static ssize_t m41t80_sysfs_show_sqwfreq(struct device *dev,
reg_sqw = M41T80_REG_WDAY;
val = i2c_smbus_read_byte_data(client, reg_sqw);
if (val < 0)
- return -EIO;
+ return val;
val = (val >> 4) & 0xf;
switch (val) {
case 0:
@@ -271,7 +270,7 @@ static ssize_t m41t80_sysfs_set_sqwfreq(struct device *dev,
{
struct i2c_client *client = to_i2c_client(dev);
struct m41t80_data *clientdata = i2c_get_clientdata(client);
- int almon, sqw, reg_sqw;
+ int almon, sqw, reg_sqw, rc;
int val = simple_strtoul(buf, NULL, 0);
if (!(clientdata->features & M41T80_FEATURE_SQ))
@@ -291,21 +290,30 @@ static ssize_t m41t80_sysfs_set_sqwfreq(struct device *dev,
/* disable SQW, set SQW frequency & re-enable */
almon = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
if (almon < 0)
- return -EIO;
+ return almon;
reg_sqw = M41T80_REG_SQW;
if (clientdata->features & M41T80_FEATURE_SQ_ALT)
reg_sqw = M41T80_REG_WDAY;
sqw = i2c_smbus_read_byte_data(client, reg_sqw);
if (sqw < 0)
- return -EIO;
+ return sqw;
sqw = (sqw & 0x0f) | (val << 4);
- if (i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
- almon & ~M41T80_ALMON_SQWE) < 0 ||
- i2c_smbus_write_byte_data(client, reg_sqw, sqw) < 0)
- return -EIO;
- if (val && i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
- almon | M41T80_ALMON_SQWE) < 0)
- return -EIO;
+
+ rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
+ almon & ~M41T80_ALMON_SQWE);
+ if (rc < 0)
+ return rc;
+
+ if (val) {
+ rc = i2c_smbus_write_byte_data(client, reg_sqw, sqw);
+ if (rc < 0)
+ return rc;
+
+ rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
+ almon | M41T80_ALMON_SQWE);
+ if (rc <0)
+ return rc;
+ }
return count;
}
static DEVICE_ATTR(sqwfreq, S_IRUGO | S_IWUSR,
@@ -629,40 +637,28 @@ static int m41t80_probe(struct i2c_client *client,
struct m41t80_data *clientdata = NULL;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C
- | I2C_FUNC_SMBUS_BYTE_DATA)) {
- rc = -ENODEV;
- goto exit;
- }
-
- dev_info(&client->dev,
- "chip found, driver version " DRV_VERSION "\n");
+ | I2C_FUNC_SMBUS_BYTE_DATA))
+ return -ENODEV;
clientdata = devm_kzalloc(&client->dev, sizeof(*clientdata),
GFP_KERNEL);
- if (!clientdata) {
- rc = -ENOMEM;
- goto exit;
- }
+ if (!clientdata)
+ return -ENOMEM;
clientdata->features = id->driver_data;
i2c_set_clientdata(client, clientdata);
rtc = devm_rtc_device_register(&client->dev, client->name,
&m41t80_rtc_ops, THIS_MODULE);
- if (IS_ERR(rtc)) {
- rc = PTR_ERR(rtc);
- rtc = NULL;
- goto exit;
- }
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
clientdata->rtc = rtc;
/* Make sure HT (Halt Update) bit is cleared */
rc = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_HOUR);
- if (rc < 0)
- goto ht_err;
- if (rc & M41T80_ALHOUR_HT) {
+ if (rc >= 0 && rc & M41T80_ALHOUR_HT) {
if (clientdata->features & M41T80_FEATURE_HT) {
m41t80_get_datetime(client, &tm);
dev_info(&client->dev, "HT bit was set!\n");
@@ -673,53 +669,44 @@ static int m41t80_probe(struct i2c_client *client,
tm.tm_mon + 1, tm.tm_mday, tm.tm_hour,
tm.tm_min, tm.tm_sec);
}
- if (i2c_smbus_write_byte_data(client,
- M41T80_REG_ALARM_HOUR,
- rc & ~M41T80_ALHOUR_HT) < 0)
- goto ht_err;
+ rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_HOUR,
+ rc & ~M41T80_ALHOUR_HT);
+ }
+
+ if (rc < 0) {
+ dev_err(&client->dev, "Can't clear HT bit\n");
+ return rc;
}
/* Make sure ST (stop) bit is cleared */
rc = i2c_smbus_read_byte_data(client, M41T80_REG_SEC);
- if (rc < 0)
- goto st_err;
- if (rc & M41T80_SEC_ST) {
- if (i2c_smbus_write_byte_data(client, M41T80_REG_SEC,
- rc & ~M41T80_SEC_ST) < 0)
- goto st_err;
+ if (rc >= 0 && rc & M41T80_SEC_ST)
+ rc = i2c_smbus_write_byte_data(client, M41T80_REG_SEC,
+ rc & ~M41T80_SEC_ST);
+ if (rc < 0) {
+ dev_err(&client->dev, "Can't clear ST bit\n");
+ return rc;
}
rc = m41t80_sysfs_register(&client->dev);
if (rc)
- goto exit;
+ return rc;
#ifdef CONFIG_RTC_DRV_M41T80_WDT
if (clientdata->features & M41T80_FEATURE_HT) {
save_client = client;
rc = misc_register(&wdt_dev);
if (rc)
- goto exit;
+ return rc;
rc = register_reboot_notifier(&wdt_notifier);
if (rc) {
misc_deregister(&wdt_dev);
- goto exit;
+ return rc;
}
}
#endif
return 0;
-
-st_err:
- rc = -EIO;
- dev_err(&client->dev, "Can't clear ST bit\n");
- goto exit;
-ht_err:
- rc = -EIO;
- dev_err(&client->dev, "Can't clear HT bit\n");
- goto exit;
-
-exit:
- return rc;
}
static int m41t80_remove(struct i2c_client *client)
@@ -750,4 +737,3 @@ module_i2c_driver(m41t80_driver);
MODULE_AUTHOR("Alexander Bigga <ab@mycable.de>");
MODULE_DESCRIPTION("ST Microelectronics M41T80 series RTC I2C Client Driver");
MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
new file mode 100644
index 000000000000..34295bf00416
--- /dev/null
+++ b/drivers/rtc/rtc-mcp795.c
@@ -0,0 +1,199 @@
+/*
+ * SPI Driver for Microchip MCP795 RTC
+ *
+ * Copyright (C) Josef Gajdusek <atx@atx.name>
+ *
+ * based on other Linux RTC drivers
+ *
+ * Device datasheet:
+ * http://ww1.microchip.com/downloads/en/DeviceDoc/22280A.pdf
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/printk.h>
+#include <linux/spi/spi.h>
+#include <linux/rtc.h>
+
+/* MCP795 Instructions, see datasheet table 3-1 */
+#define MCP795_EEREAD 0x03
+#define MCP795_EEWRITE 0x02
+#define MCP795_EEWRDI 0x04
+#define MCP795_EEWREN 0x06
+#define MCP795_SRREAD 0x05
+#define MCP795_SRWRITE 0x01
+#define MCP795_READ 0x13
+#define MCP795_WRITE 0x12
+#define MCP795_UNLOCK 0x14
+#define MCP795_IDWRITE 0x32
+#define MCP795_IDREAD 0x33
+#define MCP795_CLRWDT 0x44
+#define MCP795_CLRRAM 0x54
+
+#define MCP795_ST_BIT 0x80
+#define MCP795_24_BIT 0x40
+
+static int mcp795_rtcc_read(struct device *dev, u8 addr, u8 *buf, u8 count)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ int ret;
+ u8 tx[2];
+
+ tx[0] = MCP795_READ;
+ tx[1] = addr;
+ ret = spi_write_then_read(spi, tx, sizeof(tx), buf, count);
+
+ if (ret)
+ dev_err(dev, "Failed reading %d bytes from address %x.\n",
+ count, addr);
+
+ return ret;
+}
+
+static int mcp795_rtcc_write(struct device *dev, u8 addr, u8 *data, u8 count)
+{
+ struct spi_device *spi = to_spi_device(dev);
+ int ret;
+ u8 tx[2 + count];
+
+ tx[0] = MCP795_WRITE;
+ tx[1] = addr;
+ memcpy(&tx[2], data, count);
+
+ ret = spi_write(spi, tx, 2 + count);
+
+ if (ret)
+ dev_err(dev, "Failed to write %d bytes to address %x.\n",
+ count, addr);
+
+ return ret;
+}
+
+static int mcp795_rtcc_set_bits(struct device *dev, u8 addr, u8 mask, u8 state)
+{
+ int ret;
+ u8 tmp;
+
+ ret = mcp795_rtcc_read(dev, addr, &tmp, 1);
+ if (ret)
+ return ret;
+
+ if ((tmp & mask) != state) {
+ tmp = (tmp & ~mask) | state;
+ ret = mcp795_rtcc_write(dev, addr, &tmp, 1);
+ }
+
+ return ret;
+}
+
+static int mcp795_set_time(struct device *dev, struct rtc_time *tim)
+{
+ int ret;
+ u8 data[7];
+
+ /* Read first, so we can leave config bits untouched */
+ ret = mcp795_rtcc_read(dev, 0x01, data, sizeof(data));
+
+ if (ret)
+ return ret;
+
+ data[0] = (data[0] & 0x80) | ((tim->tm_sec / 10) << 4) | (tim->tm_sec % 10);
+ data[1] = (data[1] & 0x80) | ((tim->tm_min / 10) << 4) | (tim->tm_min % 10);
+ data[2] = ((tim->tm_hour / 10) << 4) | (tim->tm_hour % 10);
+ data[4] = ((tim->tm_mday / 10) << 4) | ((tim->tm_mday) % 10);
+ data[5] = (data[5] & 0x10) | (tim->tm_mon / 10) | (tim->tm_mon % 10);
+
+ if (tim->tm_year > 100)
+ tim->tm_year -= 100;
+
+ data[6] = ((tim->tm_year / 10) << 4) | (tim->tm_year % 10);
+
+ ret = mcp795_rtcc_write(dev, 0x01, data, sizeof(data));
+
+ if (ret)
+ return ret;
+
+ dev_dbg(dev, "Set mcp795: %04d-%02d-%02d %02d:%02d:%02d\n",
+ tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
+ tim->tm_hour, tim->tm_min, tim->tm_sec);
+
+ return 0;
+}
+
+static int mcp795_read_time(struct device *dev, struct rtc_time *tim)
+{
+ int ret;
+ u8 data[7];
+
+ ret = mcp795_rtcc_read(dev, 0x01, data, sizeof(data));
+
+ if (ret)
+ return ret;
+
+ tim->tm_sec = ((data[0] & 0x70) >> 4) * 10 + (data[0] & 0x0f);
+ tim->tm_min = ((data[1] & 0x70) >> 4) * 10 + (data[1] & 0x0f);
+ tim->tm_hour = ((data[2] & 0x30) >> 4) * 10 + (data[2] & 0x0f);
+ tim->tm_mday = ((data[4] & 0x30) >> 4) * 10 + (data[4] & 0x0f);
+ tim->tm_mon = ((data[5] & 0x10) >> 4) * 10 + (data[5] & 0x0f);
+ tim->tm_year = ((data[6] & 0xf0) >> 4) * 10 + (data[6] & 0x0f) + 100; /* Assume we are in 20xx */
+
+ dev_dbg(dev, "Read from mcp795: %04d-%02d-%02d %02d:%02d:%02d\n",
+ tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
+ tim->tm_hour, tim->tm_min, tim->tm_sec);
+
+ return rtc_valid_tm(tim);
+}
+
+static struct rtc_class_ops mcp795_rtc_ops = {
+ .read_time = mcp795_read_time,
+ .set_time = mcp795_set_time
+};
+
+static int mcp795_probe(struct spi_device *spi)
+{
+ struct rtc_device *rtc;
+ int ret;
+
+ spi->mode = SPI_MODE_0;
+ spi->bits_per_word = 8;
+ ret = spi_setup(spi);
+ if (ret) {
+ dev_err(&spi->dev, "Unable to setup SPI\n");
+ return ret;
+ }
+
+ /* Start the oscillator */
+ mcp795_rtcc_set_bits(&spi->dev, 0x01, MCP795_ST_BIT, MCP795_ST_BIT);
+ /* Clear the 12 hour mode flag*/
+ mcp795_rtcc_set_bits(&spi->dev, 0x03, MCP795_24_BIT, 0);
+
+ rtc = devm_rtc_device_register(&spi->dev, "rtc-mcp795",
+ &mcp795_rtc_ops, THIS_MODULE);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ spi_set_drvdata(spi, rtc);
+
+ return 0;
+}
+
+static struct spi_driver mcp795_driver = {
+ .driver = {
+ .name = "rtc-mcp795",
+ .owner = THIS_MODULE,
+ },
+ .probe = mcp795_probe,
+};
+
+module_spi_driver(mcp795_driver);
+
+MODULE_DESCRIPTION("MCP795 RTC SPI Driver");
+MODULE_AUTHOR("Josef Gajdusek <atx@atx.name>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mcp795");
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
index d15a999363fc..6aaec2fc7c0d 100644
--- a/drivers/rtc/rtc-mv.c
+++ b/drivers/rtc/rtc-mv.c
@@ -319,7 +319,7 @@ static int __exit mv_rtc_remove(struct platform_device *pdev)
}
#ifdef CONFIG_OF
-static struct of_device_id rtc_mv_of_match_table[] = {
+static const struct of_device_id rtc_mv_of_match_table[] = {
{ .compatible = "marvell,orion-rtc", },
{}
};
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 26de5f8c2ae4..03bce134414c 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -73,43 +73,52 @@
#define OMAP_RTC_IRQWAKEEN 0x7c
/* OMAP_RTC_CTRL_REG bit fields: */
-#define OMAP_RTC_CTRL_SPLIT (1<<7)
-#define OMAP_RTC_CTRL_DISABLE (1<<6)
-#define OMAP_RTC_CTRL_SET_32_COUNTER (1<<5)
-#define OMAP_RTC_CTRL_TEST (1<<4)
-#define OMAP_RTC_CTRL_MODE_12_24 (1<<3)
-#define OMAP_RTC_CTRL_AUTO_COMP (1<<2)
-#define OMAP_RTC_CTRL_ROUND_30S (1<<1)
-#define OMAP_RTC_CTRL_STOP (1<<0)
+#define OMAP_RTC_CTRL_SPLIT BIT(7)
+#define OMAP_RTC_CTRL_DISABLE BIT(6)
+#define OMAP_RTC_CTRL_SET_32_COUNTER BIT(5)
+#define OMAP_RTC_CTRL_TEST BIT(4)
+#define OMAP_RTC_CTRL_MODE_12_24 BIT(3)
+#define OMAP_RTC_CTRL_AUTO_COMP BIT(2)
+#define OMAP_RTC_CTRL_ROUND_30S BIT(1)
+#define OMAP_RTC_CTRL_STOP BIT(0)
/* OMAP_RTC_STATUS_REG bit fields: */
-#define OMAP_RTC_STATUS_POWER_UP (1<<7)
-#define OMAP_RTC_STATUS_ALARM (1<<6)
-#define OMAP_RTC_STATUS_1D_EVENT (1<<5)
-#define OMAP_RTC_STATUS_1H_EVENT (1<<4)
-#define OMAP_RTC_STATUS_1M_EVENT (1<<3)
-#define OMAP_RTC_STATUS_1S_EVENT (1<<2)
-#define OMAP_RTC_STATUS_RUN (1<<1)
-#define OMAP_RTC_STATUS_BUSY (1<<0)
+#define OMAP_RTC_STATUS_POWER_UP BIT(7)
+#define OMAP_RTC_STATUS_ALARM BIT(6)
+#define OMAP_RTC_STATUS_1D_EVENT BIT(5)
+#define OMAP_RTC_STATUS_1H_EVENT BIT(4)
+#define OMAP_RTC_STATUS_1M_EVENT BIT(3)
+#define OMAP_RTC_STATUS_1S_EVENT BIT(2)
+#define OMAP_RTC_STATUS_RUN BIT(1)
+#define OMAP_RTC_STATUS_BUSY BIT(0)
/* OMAP_RTC_INTERRUPTS_REG bit fields: */
-#define OMAP_RTC_INTERRUPTS_IT_ALARM (1<<3)
-#define OMAP_RTC_INTERRUPTS_IT_TIMER (1<<2)
+#define OMAP_RTC_INTERRUPTS_IT_ALARM BIT(3)
+#define OMAP_RTC_INTERRUPTS_IT_TIMER BIT(2)
+
+/* OMAP_RTC_OSC_REG bit fields: */
+#define OMAP_RTC_OSC_32KCLK_EN BIT(6)
/* OMAP_RTC_IRQWAKEEN bit fields: */
-#define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN (1<<1)
+#define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN BIT(1)
/* OMAP_RTC_KICKER values */
#define KICK0_VALUE 0x83e70b13
#define KICK1_VALUE 0x95a4f1e0
-#define OMAP_RTC_HAS_KICKER 0x1
+#define OMAP_RTC_HAS_KICKER BIT(0)
/*
* Few RTC IP revisions has special WAKE-EN Register to enable Wakeup
* generation for event Alarm.
*/
-#define OMAP_RTC_HAS_IRQWAKEEN 0x2
+#define OMAP_RTC_HAS_IRQWAKEEN BIT(1)
+
+/*
+ * Some RTC IP revisions (like those in AM335x and DRA7x) need
+ * the 32KHz clock to be explicitly enabled.
+ */
+#define OMAP_RTC_HAS_32KCLK_EN BIT(2)
static void __iomem *rtc_base;
@@ -319,7 +328,8 @@ static struct platform_device_id omap_rtc_devtype[] = {
},
[OMAP_RTC_DATA_AM3352_IDX] = {
.name = "am3352-rtc",
- .driver_data = OMAP_RTC_HAS_KICKER | OMAP_RTC_HAS_IRQWAKEEN,
+ .driver_data = OMAP_RTC_HAS_KICKER | OMAP_RTC_HAS_IRQWAKEEN |
+ OMAP_RTC_HAS_32KCLK_EN,
},
[OMAP_RTC_DATA_DA830_IDX] = {
.name = "da830-rtc",
@@ -352,6 +362,12 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
if (of_id)
pdev->id_entry = of_id->data;
+ id_entry = platform_get_device_id(pdev);
+ if (!id_entry) {
+ dev_err(&pdev->dev, "no matching device entry\n");
+ return -ENODEV;
+ }
+
omap_rtc_timer = platform_get_irq(pdev, 0);
if (omap_rtc_timer <= 0) {
pr_debug("%s: no update irq?\n", pdev->name);
@@ -373,8 +389,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
- id_entry = platform_get_device_id(pdev);
- if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER)) {
+ if (id_entry->driver_data & OMAP_RTC_HAS_KICKER) {
rtc_writel(KICK0_VALUE, OMAP_RTC_KICK0_REG);
rtc_writel(KICK1_VALUE, OMAP_RTC_KICK1_REG);
}
@@ -393,6 +408,10 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
*/
rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
+ /* enable RTC functional clock */
+ if (id_entry->driver_data & OMAP_RTC_HAS_32KCLK_EN)
+ rtc_writel(OMAP_RTC_OSC_32KCLK_EN, OMAP_RTC_OSC_REG);
+
/* clear old status */
reg = rtc_read(OMAP_RTC_STATUS_REG);
if (reg & (u8) OMAP_RTC_STATUS_POWER_UP) {
@@ -452,7 +471,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
return 0;
fail0:
- if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER))
+ if (id_entry->driver_data & OMAP_RTC_HAS_KICKER)
rtc_writel(0, OMAP_RTC_KICK0_REG);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
@@ -469,7 +488,7 @@ static int __exit omap_rtc_remove(struct platform_device *pdev)
/* leave rtc running, but disable irqs */
rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
- if (id_entry && (id_entry->driver_data & OMAP_RTC_HAS_KICKER))
+ if (id_entry->driver_data & OMAP_RTC_HAS_KICKER)
rtc_writel(0, OMAP_RTC_KICK0_REG);
/* Disable the clock/module */
diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c
index c360d62fb3f6..4dfe2d793fa3 100644
--- a/drivers/rtc/rtc-palmas.c
+++ b/drivers/rtc/rtc-palmas.c
@@ -352,7 +352,7 @@ static SIMPLE_DEV_PM_OPS(palmas_rtc_pm_ops, palmas_rtc_suspend,
palmas_rtc_resume);
#ifdef CONFIG_OF
-static struct of_device_id of_palmas_rtc_match[] = {
+static const struct of_device_id of_palmas_rtc_match[] = {
{ .compatible = "ti,palmas-rtc"},
{ },
};
diff --git a/drivers/rtc/rtc-puv3.c b/drivers/rtc/rtc-puv3.c
index 1ecfe3bd92ac..1cff2a21db67 100644
--- a/drivers/rtc/rtc-puv3.c
+++ b/drivers/rtc/rtc-puv3.c
@@ -71,7 +71,7 @@ static int puv3_rtc_setpie(struct device *dev, int enabled)
{
unsigned int tmp;
- dev_debug(dev, "%s: pie=%d\n", __func__, enabled);
+ dev_dbg(dev, "%s: pie=%d\n", __func__, enabled);
spin_lock_irq(&puv3_rtc_pie_lock);
tmp = readl(RTC_RTSR) & ~RTC_RTSR_HZE;
@@ -140,7 +140,7 @@ static int puv3_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
rtc_tm_to_time(tm, &rtcalarm_count);
writel(rtcalarm_count, RTC_RTAR);
- puv3_rtc_setaie(&dev->dev, alrm->enabled);
+ puv3_rtc_setaie(dev, alrm->enabled);
if (alrm->enabled)
enable_irq_wake(puv3_rtc_alarmno);
diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c
new file mode 100644
index 000000000000..14129cc85bdb
--- /dev/null
+++ b/drivers/rtc/rtc-xgene.c
@@ -0,0 +1,278 @@
+/*
+ * APM X-Gene SoC Real Time Clock Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Author: Rameshwar Prasad Sahu <rsahu@apm.com>
+ * Loc Ho <lho@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/rtc.h>
+
+/* RTC CSR Registers */
+#define RTC_CCVR 0x00
+#define RTC_CMR 0x04
+#define RTC_CLR 0x08
+#define RTC_CCR 0x0C
+#define RTC_CCR_IE BIT(0)
+#define RTC_CCR_MASK BIT(1)
+#define RTC_CCR_EN BIT(2)
+#define RTC_CCR_WEN BIT(3)
+#define RTC_STAT 0x10
+#define RTC_STAT_BIT BIT(0)
+#define RTC_RSTAT 0x14
+#define RTC_EOI 0x18
+#define RTC_VER 0x1C
+
+struct xgene_rtc_dev {
+ struct rtc_device *rtc;
+ struct device *dev;
+ unsigned long alarm_time;
+ void __iomem *csr_base;
+ struct clk *clk;
+ unsigned int irq_wake;
+};
+
+static int xgene_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
+
+ rtc_time_to_tm(readl(pdata->csr_base + RTC_CCVR), tm);
+ return rtc_valid_tm(tm);
+}
+
+static int xgene_rtc_set_mmss(struct device *dev, unsigned long secs)
+{
+ struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
+
+ /*
+ * NOTE: After the following write, the RTC_CCVR is only reflected
+ * after the update cycle of 1 seconds.
+ */
+ writel((u32) secs, pdata->csr_base + RTC_CLR);
+ readl(pdata->csr_base + RTC_CLR); /* Force a barrier */
+
+ return 0;
+}
+
+static int xgene_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
+
+ rtc_time_to_tm(pdata->alarm_time, &alrm->time);
+ alrm->enabled = readl(pdata->csr_base + RTC_CCR) & RTC_CCR_IE;
+
+ return 0;
+}
+
+static int xgene_rtc_alarm_irq_enable(struct device *dev, u32 enabled)
+{
+ struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
+ u32 ccr;
+
+ ccr = readl(pdata->csr_base + RTC_CCR);
+ if (enabled) {
+ ccr &= ~RTC_CCR_MASK;
+ ccr |= RTC_CCR_IE;
+ } else {
+ ccr &= ~RTC_CCR_IE;
+ ccr |= RTC_CCR_MASK;
+ }
+ writel(ccr, pdata->csr_base + RTC_CCR);
+
+ return 0;
+}
+
+static int xgene_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+ struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
+ unsigned long rtc_time;
+ unsigned long alarm_time;
+
+ rtc_time = readl(pdata->csr_base + RTC_CCVR);
+ rtc_tm_to_time(&alrm->time, &alarm_time);
+
+ pdata->alarm_time = alarm_time;
+ writel((u32) pdata->alarm_time, pdata->csr_base + RTC_CMR);
+
+ xgene_rtc_alarm_irq_enable(dev, alrm->enabled);
+
+ return 0;
+}
+
+static const struct rtc_class_ops xgene_rtc_ops = {
+ .read_time = xgene_rtc_read_time,
+ .set_mmss = xgene_rtc_set_mmss,
+ .read_alarm = xgene_rtc_read_alarm,
+ .set_alarm = xgene_rtc_set_alarm,
+ .alarm_irq_enable = xgene_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t xgene_rtc_interrupt(int irq, void *id)
+{
+ struct xgene_rtc_dev *pdata = (struct xgene_rtc_dev *) id;
+
+ /* Check if interrupt asserted */
+ if (!(readl(pdata->csr_base + RTC_STAT) & RTC_STAT_BIT))
+ return IRQ_NONE;
+
+ /* Clear interrupt */
+ readl(pdata->csr_base + RTC_EOI);
+
+ rtc_update_irq(pdata->rtc, 1, RTC_IRQF | RTC_AF);
+
+ return IRQ_HANDLED;
+}
+
+static int xgene_rtc_probe(struct platform_device *pdev)
+{
+ struct xgene_rtc_dev *pdata;
+ struct resource *res;
+ int ret;
+ int irq;
+
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+ platform_set_drvdata(pdev, pdata);
+ pdata->dev = &pdev->dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ pdata->csr_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(pdata->csr_base))
+ return PTR_ERR(pdata->csr_base);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "No IRQ resource\n");
+ return irq;
+ }
+ ret = devm_request_irq(&pdev->dev, irq, xgene_rtc_interrupt, 0,
+ dev_name(&pdev->dev), pdata);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not request IRQ\n");
+ return ret;
+ }
+
+ pdata->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(pdata->clk)) {
+ dev_err(&pdev->dev, "Couldn't get the clock for RTC\n");
+ return -ENODEV;
+ }
+ clk_prepare_enable(pdata->clk);
+
+ /* Turn on the clock and the crystal */
+ writel(RTC_CCR_EN, pdata->csr_base + RTC_CCR);
+
+ device_init_wakeup(&pdev->dev, 1);
+
+ pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+ &xgene_rtc_ops, THIS_MODULE);
+ if (IS_ERR(pdata->rtc)) {
+ clk_disable_unprepare(pdata->clk);
+ return PTR_ERR(pdata->rtc);
+ }
+
+ /* HW does not support update faster than 1 seconds */
+ pdata->rtc->uie_unsupported = 1;
+
+ return 0;
+}
+
+static int xgene_rtc_remove(struct platform_device *pdev)
+{
+ struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev);
+
+ xgene_rtc_alarm_irq_enable(&pdev->dev, 0);
+ device_init_wakeup(&pdev->dev, 0);
+ clk_disable_unprepare(pdata->clk);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int xgene_rtc_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev);
+ int irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (device_may_wakeup(&pdev->dev)) {
+ if (!enable_irq_wake(irq))
+ pdata->irq_wake = 1;
+ } else {
+ xgene_rtc_alarm_irq_enable(dev, 0);
+ clk_disable(pdata->clk);
+ }
+
+ return 0;
+}
+
+static int xgene_rtc_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev);
+ int irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (device_may_wakeup(&pdev->dev)) {
+ if (pdata->irq_wake) {
+ disable_irq_wake(irq);
+ pdata->irq_wake = 0;
+ }
+ } else {
+ clk_enable(pdata->clk);
+ xgene_rtc_alarm_irq_enable(dev, 1);
+ }
+
+ return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(xgene_rtc_pm_ops, xgene_rtc_suspend, xgene_rtc_resume);
+
+#ifdef CONFIG_OF
+static const struct of_device_id xgene_rtc_of_match[] = {
+ {.compatible = "apm,xgene-rtc" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, xgene_rtc_of_match);
+#endif
+
+static struct platform_driver xgene_rtc_driver = {
+ .probe = xgene_rtc_probe,
+ .remove = xgene_rtc_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "xgene-rtc",
+ .pm = &xgene_rtc_pm_ops,
+ .of_match_table = of_match_ptr(xgene_rtc_of_match),
+ },
+};
+
+module_platform_driver(xgene_rtc_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC RTC driver");
+MODULE_AUTHOR("Rameshwar Sahu <rsahu@apm.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index b24aa010f68c..65cd80bf9aec 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -13,6 +13,10 @@ config VT
bool "Virtual terminal" if EXPERT
depends on !S390 && !UML
select INPUT
+ select NEW_LEDS
+ select LEDS_CLASS
+ select LEDS_TRIGGERS
+ select INPUT_LEDS
default y
---help---
If you say Y here, you will get support for terminal devices with
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index ce396ecdf412..38d5f9ae1cc1 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -46,6 +46,7 @@
#include <linux/jiffies.h>
#include <linux/syscalls.h>
#include <linux/of.h>
+#include <linux/rcupdate.h>
#include <asm/ptrace.h>
#include <asm/irq_regs.h>
@@ -510,9 +511,9 @@ void __handle_sysrq(int key, bool check_mask)
struct sysrq_key_op *op_p;
int orig_log_level;
int i;
- unsigned long flags;
- spin_lock_irqsave(&sysrq_key_table_lock, flags);
+ rcu_sysrq_start();
+ rcu_read_lock();
/*
* Raise the apparent loglevel to maximum so that the sysrq header
* is shown to provide the user with positive feedback. We do not
@@ -554,7 +555,8 @@ void __handle_sysrq(int key, bool check_mask)
printk("\n");
console_loglevel = orig_log_level;
}
- spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
+ rcu_read_unlock();
+ rcu_sysrq_end();
}
void handle_sysrq(int key)
@@ -1043,16 +1045,23 @@ static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
struct sysrq_key_op *remove_op_p)
{
int retval;
- unsigned long flags;
- spin_lock_irqsave(&sysrq_key_table_lock, flags);
+ spin_lock(&sysrq_key_table_lock);
if (__sysrq_get_key_op(key) == remove_op_p) {
__sysrq_put_key_op(key, insert_op_p);
retval = 0;
} else {
retval = -1;
}
- spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
+ spin_unlock(&sysrq_key_table_lock);
+
+ /*
+ * A concurrent __handle_sysrq either got the old op or the new op.
+ * Wait for it to go away before returning, so the code for an old
+ * op is not freed (eg. on module unload) while it is in use.
+ */
+ synchronize_rcu();
+
return retval;
}
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d0e3a4497707..d6ecfc9e734f 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/leds.h>
#include <linux/kbd_kern.h>
#include <linux/kbd_diacr.h>
@@ -130,6 +131,7 @@ static char rep; /* flag telling character repeat */
static int shift_state = 0;
static unsigned char ledstate = 0xff; /* undefined */
+static unsigned char lockstate = 0xff; /* undefined */
static unsigned char ledioctl;
/*
@@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
}
}
+/* We route VT keyboard "leds" through triggers */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_ledstate[] = {
+#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_ledstate_trigger_activate, \
+ }
+ DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK, "kbd-numlock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK, "kbd-capslock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK, "kbd-kanalock"),
+#undef DEFINE_LEDSTATE_TRIGGER
+};
+
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_lockstate[] = {
+#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_lockstate_trigger_activate, \
+ }
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"),
+#undef DEFINE_LOCKSTATE_TRIGGER
+};
+
/*
* The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
* or (ii) whatever pattern of lights people want to show using KDSETLED,
@@ -995,18 +1032,25 @@ static inline unsigned char getleds(void)
return kbd->ledflagstate;
}
-static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+/* Called on trigger connection, to set initial state */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev)
{
- unsigned char leds = *(unsigned char *)data;
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_ledstate;
- if (test_bit(EV_LED, handle->dev->evbit)) {
- input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
- input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02));
- input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04));
- input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
- }
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
+}
- return 0;
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_lockstate;
+
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
}
/**
@@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy)
{
unsigned char leds;
unsigned long flags;
-
+ int i;
+
spin_lock_irqsave(&led_lock, flags);
leds = getleds();
spin_unlock_irqrestore(&led_lock, flags);
if (leds != ledstate) {
- input_handler_for_each_handle(&kbd_handler, &leds,
- kbd_update_leds_helper);
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++)
+ if ((leds ^ ledstate) & (1 << i))
+ led_trigger_event(&ledtrig_ledstate[i],
+ leds & (1 << i)
+ ? LED_FULL : LED_OFF);
ledstate = leds;
}
+
+ if (kbd->lockstate != lockstate) {
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++)
+ if ((kbd->lockstate ^ lockstate) & (1 << i))
+ led_trigger_event(&ledtrig_lockstate[i],
+ kbd->lockstate & (1 << i)
+ ? LED_FULL : LED_OFF);
+ lockstate = kbd->lockstate;
+ }
}
DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle)
kfree(handle);
}
-/*
- * Start keyboard handler on the new keyboard by refreshing LED state to
- * match the rest of the system.
- */
-static void kbd_start(struct input_handle *handle)
-{
- tasklet_disable(&keyboard_tasklet);
-
- if (ledstate != 0xff)
- kbd_update_leds_helper(handle, &ledstate);
-
- tasklet_enable(&keyboard_tasklet);
-}
-
static const struct input_device_id kbd_ids[] = {
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
@@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = {
.match = kbd_match,
.connect = kbd_connect,
.disconnect = kbd_disconnect,
- .start = kbd_start,
.name = "kbd",
.id_table = kbd_ids,
};
@@ -1501,6 +1543,20 @@ int __init kbd_init(void)
if (error)
return error;
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) {
+ error = led_trigger_register(&ledtrig_ledstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_ledstate[i].name);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) {
+ error = led_trigger_register(&ledtrig_lockstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_lockstate[i].name);
+ }
+
tasklet_enable(&keyboard_tasklet);
tasklet_schedule(&keyboard_tasklet);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index bd2172c2d650..31672740fe28 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -189,8 +189,6 @@ static ssize_t brightness_store(struct device *dev,
}
mutex_unlock(&bd->ops_lock);
- backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
return rc;
}
static DEVICE_ATTR_RW(brightness);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 14da82564f4e..6894b085f0ee 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -537,7 +537,7 @@ static struct attribute_group v9fs_attr_group = {
*
*/
-static int v9fs_sysfs_init(void)
+static int __init v9fs_sysfs_init(void)
{
v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
if (!v9fs_kobj)
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 4d0c2e0be7e5..0b3bfa303dda 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -42,7 +42,6 @@
/**
* struct p9_rdir - readdir accounting
- * @mutex: mutex protecting readdir
* @head: start offset of current dirread buffer
* @tail: end offset of current dirread buffer
* @buf: dirread buffer
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 0b5568f3fb75..520c11c2dcca 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -681,7 +681,7 @@ v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
/**
* v9fs_cached_file_read - read from a file
* @filp: file pointer to read
- * @udata: user data buffer to read data into
+ * @data: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
@@ -698,7 +698,7 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
/**
* v9fs_mmap_file_read - read from a file
* @filp: file pointer to read
- * @udata: user data buffer to read data into
+ * @data: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 53161ec058a7..00d140fb2263 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -580,7 +580,7 @@ static int v9fs_at_to_dotl_flags(int flags)
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
* @dentry: dentry that is being deleted
- * @rmdir: removing a directory
+ * @flags: removing a directory
*
*/
@@ -778,7 +778,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
* @dentry: dentry that is being walked to?
- * @nameidata: path data
+ * @flags: lookup flags (unused)
*
*/
@@ -1324,7 +1324,7 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
* v9fs_vfs_mkspecial - create a special file
* @dir: inode to create special file in
* @dentry: dentry to create
- * @mode: mode to create special file
+ * @perm: mode to create special file
* @extension: 9p2000.u format extension string representing special file
*
*/
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 59dc8e87647f..1fa85aae24df 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -226,7 +226,7 @@ int v9fs_open_to_dotl_flags(int flags)
* v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
- * @mode: create permissions
+ * @omode: create permissions
*
*/
@@ -375,7 +375,7 @@ err_clunk_old_fid:
* v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
* @dir: inode that is being unlinked
* @dentry: dentry that is being unlinked
- * @mode: mode for new directory
+ * @omode: mode for new directory
*
*/
@@ -607,7 +607,6 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
* v9fs_stat2inode_dotl - populate an inode structure with stat info
* @stat: stat structure
* @inode: inode to populate
- * @sb: superblock of filesystem
*
*/
@@ -808,7 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
* v9fs_vfs_mknod_dotl - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
- * @mode: mode for creation
+ * @omode: mode for creation
* @rdev: device associated with special file
*
*/
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 25b23b1e7f22..9bca88159725 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -1,3 +1,9 @@
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
@@ -206,7 +212,7 @@ affs_set_blocksize(struct super_block *sb, int size)
static inline struct buffer_head *
affs_bread(struct super_block *sb, int block)
{
- pr_debug("affs_bread: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
return sb_bread(sb, block);
return NULL;
@@ -214,7 +220,7 @@ affs_bread(struct super_block *sb, int block)
static inline struct buffer_head *
affs_getblk(struct super_block *sb, int block)
{
- pr_debug("affs_getblk: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
return sb_getblk(sb, block);
return NULL;
@@ -223,7 +229,7 @@ static inline struct buffer_head *
affs_getzeroblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
- pr_debug("affs_getzeroblk: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
bh = sb_getblk(sb, block);
lock_buffer(bh);
@@ -238,7 +244,7 @@ static inline struct buffer_head *
affs_getemptyblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
- pr_debug("affs_getemptyblk: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
bh = sb_getblk(sb, block);
wait_on_buffer(bh);
@@ -251,7 +257,7 @@ static inline void
affs_brelse(struct buffer_head *bh)
{
if (bh)
- pr_debug("affs_brelse: %lld\n", (long long) bh->b_blocknr);
+ pr_debug("%s: %lld\n", __func__, (long long) bh->b_blocknr);
brelse(bh);
}
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 533a322c41c0..406b29836b19 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -34,7 +34,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
ino = bh->b_blocknr;
offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]);
- pr_debug("AFFS: insert_hash(dir=%u, ino=%d)\n", (u32)dir->i_ino, ino);
+ pr_debug("%s(dir=%u, ino=%d)\n", __func__, (u32)dir->i_ino, ino);
dir_bh = affs_bread(sb, dir->i_ino);
if (!dir_bh)
@@ -84,7 +84,8 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
sb = dir->i_sb;
rem_ino = rem_bh->b_blocknr;
offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]);
- pr_debug("AFFS: remove_hash(dir=%d, ino=%d, hashval=%d)\n", (u32)dir->i_ino, rem_ino, offset);
+ pr_debug("%s(dir=%d, ino=%d, hashval=%d)\n",
+ __func__, (u32)dir->i_ino, rem_ino, offset);
bh = affs_bread(sb, dir->i_ino);
if (!bh)
@@ -147,7 +148,7 @@ affs_remove_link(struct dentry *dentry)
u32 link_ino, ino;
int retval;
- pr_debug("AFFS: remove_link(key=%ld)\n", inode->i_ino);
+ pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
retval = -EIO;
bh = affs_bread(sb, inode->i_ino);
if (!bh)
@@ -279,7 +280,7 @@ affs_remove_header(struct dentry *dentry)
if (!inode)
goto done;
- pr_debug("AFFS: remove_header(key=%ld)\n", inode->i_ino);
+ pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
retval = -EIO;
bh = affs_bread(sb, (u32)(long)dentry->d_fsdata);
if (!bh)
@@ -451,10 +452,10 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
va_end(args);
- printk(KERN_CRIT "AFFS error (device %s): %s(): %s\n", sb->s_id,
+ pr_crit("error (device %s): %s(): %s\n", sb->s_id,
function,ErrorBuffer);
if (!(sb->s_flags & MS_RDONLY))
- printk(KERN_WARNING "AFFS: Remounting filesystem read-only\n");
+ pr_warn("Remounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
}
@@ -467,7 +468,7 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
va_end(args);
- printk(KERN_WARNING "AFFS warning (device %s): %s(): %s\n", sb->s_id,
+ pr_warn("(device %s): %s(): %s\n", sb->s_id,
function,ErrorBuffer);
}
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index a32246b8359e..c8de51185c23 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -17,7 +17,7 @@ affs_count_free_blocks(struct super_block *sb)
u32 free;
int i;
- pr_debug("AFFS: count_free_blocks()\n");
+ pr_debug("%s()\n", __func__);
if (sb->s_flags & MS_RDONLY)
return 0;
@@ -43,7 +43,7 @@ affs_free_block(struct super_block *sb, u32 block)
u32 blk, bmap, bit, mask, tmp;
__be32 *data;
- pr_debug("AFFS: free_block(%u)\n", block);
+ pr_debug("%s(%u)\n", __func__, block);
if (block > sbi->s_partition_size)
goto err_range;
@@ -125,7 +125,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
sb = inode->i_sb;
sbi = AFFS_SB(sb);
- pr_debug("AFFS: balloc(inode=%lu,goal=%u): ", inode->i_ino, goal);
+ pr_debug("balloc(inode=%lu,goal=%u): ", inode->i_ino, goal);
if (AFFS_I(inode)->i_pa_cnt) {
pr_debug("%d\n", AFFS_I(inode)->i_lastalloc+1);
@@ -254,8 +254,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
return 0;
if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) {
- printk(KERN_NOTICE "AFFS: Bitmap invalid - mounting %s read only\n",
- sb->s_id);
+ pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id);
*flags |= MS_RDONLY;
return 0;
}
@@ -268,7 +267,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
size = sbi->s_bmap_count * sizeof(*bm);
bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL);
if (!sbi->s_bitmap) {
- printk(KERN_ERR "AFFS: Bitmap allocation failed\n");
+ pr_err("Bitmap allocation failed\n");
return -ENOMEM;
}
@@ -282,17 +281,17 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
bm->bm_key = be32_to_cpu(bmap_blk[blk]);
bh = affs_bread(sb, bm->bm_key);
if (!bh) {
- printk(KERN_ERR "AFFS: Cannot read bitmap\n");
+ pr_err("Cannot read bitmap\n");
res = -EIO;
goto out;
}
if (affs_checksum_block(sb, bh)) {
- printk(KERN_WARNING "AFFS: Bitmap %u invalid - mounting %s read only.\n",
- bm->bm_key, sb->s_id);
+ pr_warn("Bitmap %u invalid - mounting %s read only.\n",
+ bm->bm_key, sb->s_id);
*flags |= MS_RDONLY;
goto out;
}
- pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key);
+ pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key);
bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4);
/* Don't try read the extension if this is the last block,
@@ -304,7 +303,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
affs_brelse(bmap_bh);
bmap_bh = affs_bread(sb, be32_to_cpu(bmap_blk[blk]));
if (!bmap_bh) {
- printk(KERN_ERR "AFFS: Cannot read bitmap extension\n");
+ pr_err("Cannot read bitmap extension\n");
res = -EIO;
goto out;
}
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index cbbda476a805..59f07bec92a6 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -54,8 +54,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
u32 ino;
int error = 0;
- pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",
- inode->i_ino, (unsigned long)ctx->pos);
+ pr_debug("%s(ino=%lu,f_pos=%lx)\n",
+ __func__, inode->i_ino, (unsigned long)ctx->pos);
if (ctx->pos < 2) {
file->private_data = (void *)0;
@@ -81,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
*/
ino = (u32)(long)file->private_data;
if (ino && file->f_version == inode->i_version) {
- pr_debug("AFFS: readdir() left off=%d\n", ino);
+ pr_debug("readdir() left off=%d\n", ino);
goto inside;
}
@@ -117,7 +117,7 @@ inside:
namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
name = AFFS_TAIL(sb, fh_bh)->name + 1;
- pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", "
+ pr_debug("readdir(): dir_emit(\"%.*s\", "
"ino=%u), hash=%d, f_pos=%x\n",
namelen, name, ino, hash_pos, (u32)ctx->pos);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 9df23175e28b..a7fe57d2cd9a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -45,7 +45,7 @@ const struct inode_operations affs_file_inode_operations = {
static int
affs_file_open(struct inode *inode, struct file *filp)
{
- pr_debug("AFFS: open(%lu,%d)\n",
+ pr_debug("open(%lu,%d)\n",
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
atomic_inc(&AFFS_I(inode)->i_opencnt);
return 0;
@@ -54,7 +54,7 @@ affs_file_open(struct inode *inode, struct file *filp)
static int
affs_file_release(struct inode *inode, struct file *filp)
{
- pr_debug("AFFS: release(%lu, %d)\n",
+ pr_debug("release(%lu, %d)\n",
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
@@ -324,7 +324,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
struct buffer_head *ext_bh;
u32 ext;
- pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
+ pr_debug("%s(%u, %lu)\n",
+ __func__, (u32)inode->i_ino, (unsigned long)block);
BUG_ON(block > (sector_t)0x7fffffffUL);
@@ -498,34 +499,36 @@ affs_getemptyblk_ino(struct inode *inode, int block)
}
static int
-affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsigned to)
+affs_do_readpage_ofs(struct page *page, unsigned to)
{
struct inode *inode = page->mapping->host;
struct super_block *sb = inode->i_sb;
struct buffer_head *bh;
char *data;
+ unsigned pos = 0;
u32 bidx, boff, bsize;
u32 tmp;
- pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
- BUG_ON(from > to || to > PAGE_CACHE_SIZE);
+ pr_debug("%s(%u, %ld, 0, %d)\n", __func__, (u32)inode->i_ino,
+ page->index, to);
+ BUG_ON(to > PAGE_CACHE_SIZE);
kmap(page);
data = page_address(page);
bsize = AFFS_SB(sb)->s_data_blksize;
- tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+ tmp = page->index << PAGE_CACHE_SHIFT;
bidx = tmp / bsize;
boff = tmp % bsize;
- while (from < to) {
+ while (pos < to) {
bh = affs_bread_ino(inode, bidx, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
- tmp = min(bsize - boff, to - from);
- BUG_ON(from + tmp > to || tmp > bsize);
- memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
+ tmp = min(bsize - boff, to - pos);
+ BUG_ON(pos + tmp > to || tmp > bsize);
+ memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
affs_brelse(bh);
bidx++;
- from += tmp;
+ pos += tmp;
boff = 0;
}
flush_dcache_page(page);
@@ -542,7 +545,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
u32 size, bsize;
u32 tmp;
- pr_debug("AFFS: extent_file(%u, %d)\n", (u32)inode->i_ino, newsize);
+ pr_debug("%s(%u, %d)\n", __func__, (u32)inode->i_ino, newsize);
bsize = AFFS_SB(sb)->s_data_blksize;
bh = NULL;
size = AFFS_I(inode)->mmu_private;
@@ -608,14 +611,14 @@ affs_readpage_ofs(struct file *file, struct page *page)
u32 to;
int err;
- pr_debug("AFFS: read_page(%u, %ld)\n", (u32)inode->i_ino, page->index);
+ pr_debug("%s(%u, %ld)\n", __func__, (u32)inode->i_ino, page->index);
to = PAGE_CACHE_SIZE;
if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) {
to = inode->i_size & ~PAGE_CACHE_MASK;
memset(page_address(page) + to, 0, PAGE_CACHE_SIZE - to);
}
- err = affs_do_readpage_ofs(file, page, 0, to);
+ err = affs_do_readpage_ofs(page, to);
if (!err)
SetPageUptodate(page);
unlock_page(page);
@@ -631,7 +634,8 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
pgoff_t index;
int err = 0;
- pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
+ pr_debug("%s(%u, %llu, %llu)\n", __func__, (u32)inode->i_ino,
+ (unsigned long long)pos, (unsigned long long)pos + len);
if (pos > AFFS_I(inode)->mmu_private) {
/* XXX: this probably leaves a too-big i_size in case of
* failure. Should really be updating i_size at write_end time
@@ -651,7 +655,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
return 0;
/* XXX: inefficient but safe in the face of short writes */
- err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE);
+ err = affs_do_readpage_ofs(page, PAGE_CACHE_SIZE);
if (err) {
unlock_page(page);
page_cache_release(page);
@@ -680,7 +684,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
* due to write_begin.
*/
- pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
+ pr_debug("%s(%u, %llu, %llu)\n",
+ __func__, (u32)inode->i_ino, (unsigned long long)pos,
+ (unsigned long long)pos + len);
bsize = AFFS_SB(sb)->s_data_blksize;
data = page_address(page);
@@ -802,7 +808,7 @@ affs_free_prealloc(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
- pr_debug("AFFS: free_prealloc(ino=%lu)\n", inode->i_ino);
+ pr_debug("free_prealloc(ino=%lu)\n", inode->i_ino);
while (AFFS_I(inode)->i_pa_cnt) {
AFFS_I(inode)->i_pa_cnt--;
@@ -822,7 +828,7 @@ affs_truncate(struct inode *inode)
struct buffer_head *ext_bh;
int i;
- pr_debug("AFFS: truncate(inode=%d, oldsize=%u, newsize=%u)\n",
+ pr_debug("truncate(inode=%d, oldsize=%u, newsize=%u)\n",
(u32)inode->i_ino, (u32)AFFS_I(inode)->mmu_private, (u32)inode->i_size);
last_blk = 0;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 96df91e8c334..bec2d1a0c91c 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -34,7 +34,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
if (!(inode->i_state & I_NEW))
return inode;
- pr_debug("AFFS: affs_iget(%lu)\n", inode->i_ino);
+ pr_debug("affs_iget(%lu)\n", inode->i_ino);
block = inode->i_ino;
bh = affs_bread(sb, block);
@@ -175,7 +175,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
uid_t uid;
gid_t gid;
- pr_debug("AFFS: write_inode(%lu)\n",inode->i_ino);
+ pr_debug("write_inode(%lu)\n", inode->i_ino);
if (!inode->i_nlink)
// possibly free block
@@ -220,7 +220,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
struct inode *inode = dentry->d_inode;
int error;
- pr_debug("AFFS: notify_change(%lu,0x%x)\n",inode->i_ino,attr->ia_valid);
+ pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
error = inode_change_ok(inode,attr);
if (error)
@@ -258,7 +258,8 @@ void
affs_evict_inode(struct inode *inode)
{
unsigned long cache_page;
- pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ pr_debug("evict_inode(ino=%lu, nlink=%u)\n",
+ inode->i_ino, inode->i_nlink);
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
@@ -271,7 +272,7 @@ affs_evict_inode(struct inode *inode)
affs_free_prealloc(inode);
cache_page = (unsigned long)AFFS_I(inode)->i_lc;
if (cache_page) {
- pr_debug("AFFS: freeing ext cache\n");
+ pr_debug("freeing ext cache\n");
AFFS_I(inode)->i_lc = NULL;
AFFS_I(inode)->i_ac = NULL;
free_page(cache_page);
@@ -350,7 +351,8 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
u32 block = 0;
int retval;
- pr_debug("AFFS: add_entry(dir=%u, inode=%u, \"%*s\", type=%d)\n", (u32)dir->i_ino,
+ pr_debug("%s(dir=%u, inode=%u, \"%*s\", type=%d)\n",
+ __func__, (u32)dir->i_ino,
(u32)inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name, type);
retval = -EIO;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 6dae1ccd176d..035bd31556fc 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -190,7 +190,8 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
toupper_t toupper = affs_get_toupper(sb);
u32 key;
- pr_debug("AFFS: find_entry(\"%.*s\")\n", (int)dentry->d_name.len, dentry->d_name.name);
+ pr_debug("%s(\"%.*s\")\n",
+ __func__, (int)dentry->d_name.len, dentry->d_name.name);
bh = affs_bread(sb, dir->i_ino);
if (!bh)
@@ -218,7 +219,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
struct buffer_head *bh;
struct inode *inode = NULL;
- pr_debug("AFFS: lookup(\"%.*s\")\n",(int)dentry->d_name.len,dentry->d_name.name);
+ pr_debug("%s(\"%.*s\")\n",
+ __func__, (int)dentry->d_name.len, dentry->d_name.name);
affs_lock_dir(dir);
bh = affs_find_entry(dir, dentry);
@@ -248,9 +250,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
int
affs_unlink(struct inode *dir, struct dentry *dentry)
{
- pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
- dentry->d_inode->i_ino,
- (int)dentry->d_name.len, dentry->d_name.name);
+ pr_debug("%s(dir=%d, %lu \"%.*s\")\n",
+ __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
+ (int)dentry->d_name.len, dentry->d_name.name);
return affs_remove_header(dentry);
}
@@ -262,7 +264,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
struct inode *inode;
int error;
- pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
+ pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
+ __func__, dir->i_ino, (int)dentry->d_name.len,
dentry->d_name.name,mode);
inode = affs_new_inode(dir);
@@ -291,8 +294,9 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int error;
- pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
- (int)dentry->d_name.len,dentry->d_name.name,mode);
+ pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
+ __func__, dir->i_ino, (int)dentry->d_name.len,
+ dentry->d_name.name, mode);
inode = affs_new_inode(dir);
if (!inode)
@@ -317,8 +321,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int
affs_rmdir(struct inode *dir, struct dentry *dentry)
{
- pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
- dentry->d_inode->i_ino,
+ pr_debug("%s(dir=%u, %lu \"%.*s\")\n",
+ __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
(int)dentry->d_name.len, dentry->d_name.name);
return affs_remove_header(dentry);
@@ -334,8 +338,9 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
int i, maxlen, error;
char c, lc;
- pr_debug("AFFS: symlink(%lu,\"%.*s\" -> \"%s\")\n",dir->i_ino,
- (int)dentry->d_name.len,dentry->d_name.name,symname);
+ pr_debug("%s(%lu,\"%.*s\" -> \"%s\")\n",
+ __func__, dir->i_ino, (int)dentry->d_name.len,
+ dentry->d_name.name, symname);
maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1;
inode = affs_new_inode(dir);
@@ -404,7 +409,8 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- pr_debug("AFFS: link(%u, %u, \"%.*s\")\n", (u32)inode->i_ino, (u32)dir->i_ino,
+ pr_debug("%s(%u, %u, \"%.*s\")\n",
+ __func__, (u32)inode->i_ino, (u32)dir->i_ino,
(int)dentry->d_name.len,dentry->d_name.name);
return affs_add_entry(dir, inode, dentry, ST_LINKFILE);
@@ -418,9 +424,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct buffer_head *bh = NULL;
int retval;
- pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
- (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
- (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
+ pr_debug("%s(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
+ __func__, (u32)old_dir->i_ino, (int)old_dentry->d_name.len,
+ old_dentry->d_name.name, (u32)new_dir->i_ino,
+ (int)new_dentry->d_name.len, new_dentry->d_name.name);
retval = affs_check_name(new_dentry->d_name.name,
new_dentry->d_name.len,
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 895ac7dc9dbf..51f1a95bff73 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -46,7 +46,7 @@ static void
affs_put_super(struct super_block *sb)
{
struct affs_sb_info *sbi = AFFS_SB(sb);
- pr_debug("AFFS: put_super()\n");
+ pr_debug("%s()\n", __func__);
cancel_delayed_work_sync(&sbi->sb_work);
}
@@ -220,7 +220,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
return 0;
if (n != 512 && n != 1024 && n != 2048
&& n != 4096) {
- printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
+ pr_warn("Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
return 0;
}
*blocksize = n;
@@ -285,8 +285,8 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
/* Silently ignore the quota options */
break;
default:
- printk("AFFS: Unrecognized mount option \"%s\" "
- "or missing value\n", p);
+ pr_warn("Unrecognized mount option \"%s\" or missing value\n",
+ p);
return 0;
}
}
@@ -319,7 +319,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
save_mount_options(sb, data);
- pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options");
+ pr_debug("read_super(%s)\n", data ? (const char *)data : "no options");
sb->s_magic = AFFS_SUPER_MAGIC;
sb->s_op = &affs_sops;
@@ -339,7 +339,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
&blocksize,&sbi->s_prefix,
sbi->s_volume, &mount_flags)) {
- printk(KERN_ERR "AFFS: Error parsing options\n");
+ pr_err("Error parsing options\n");
return -EINVAL;
}
/* N.B. after this point s_prefix must be released */
@@ -356,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
*/
size = sb->s_bdev->bd_inode->i_size >> 9;
- pr_debug("AFFS: initial blocksize=%d, #blocks=%d\n", 512, size);
+ pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size);
affs_set_blocksize(sb, PAGE_SIZE);
/* Try to find root block. Its location depends on the block size. */
@@ -371,7 +371,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_root_block = root_block;
if (root_block < 0)
sbi->s_root_block = (reserved + size - 1) / 2;
- pr_debug("AFFS: setting blocksize to %d\n", blocksize);
+ pr_debug("setting blocksize to %d\n", blocksize);
affs_set_blocksize(sb, blocksize);
sbi->s_partition_size = size;
@@ -386,7 +386,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
* block behind the calculated one. So we check this one, too.
*/
for (num_bm = 0; num_bm < 2; num_bm++) {
- pr_debug("AFFS: Dev %s, trying root=%u, bs=%d, "
+ pr_debug("Dev %s, trying root=%u, bs=%d, "
"size=%d, reserved=%d\n",
sb->s_id,
sbi->s_root_block + num_bm,
@@ -407,8 +407,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
}
}
if (!silent)
- printk(KERN_ERR "AFFS: No valid root block on device %s\n",
- sb->s_id);
+ pr_err("No valid root block on device %s\n", sb->s_id);
return -EINVAL;
/* N.B. after this point bh must be released */
@@ -420,7 +419,7 @@ got_root:
/* Find out which kind of FS we have */
boot_bh = sb_bread(sb, 0);
if (!boot_bh) {
- printk(KERN_ERR "AFFS: Cannot read boot block\n");
+ pr_err("Cannot read boot block\n");
return -EINVAL;
}
memcpy(sig, boot_bh->b_data, 4);
@@ -433,8 +432,7 @@ got_root:
*/
if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS
|| chksum == MUFS_DCOFS) && !(sb->s_flags & MS_RDONLY)) {
- printk(KERN_NOTICE "AFFS: Dircache FS - mounting %s read only\n",
- sb->s_id);
+ pr_notice("Dircache FS - mounting %s read only\n", sb->s_id);
sb->s_flags |= MS_RDONLY;
}
switch (chksum) {
@@ -468,14 +466,14 @@ got_root:
sb->s_flags |= MS_NOEXEC;
break;
default:
- printk(KERN_ERR "AFFS: Unknown filesystem on device %s: %08X\n",
- sb->s_id, chksum);
+ pr_err("Unknown filesystem on device %s: %08X\n",
+ sb->s_id, chksum);
return -EINVAL;
}
if (mount_flags & SF_VERBOSE) {
u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
- printk(KERN_NOTICE "AFFS: Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
+ pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
len > 31 ? 31 : len,
AFFS_ROOT_TAIL(sb, root_bh)->disk_name + 1,
sig, sig[3] + '0', blocksize);
@@ -506,11 +504,11 @@ got_root:
sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
- printk(KERN_ERR "AFFS: Get root inode failed\n");
+ pr_err("AFFS: Get root inode failed\n");
return -ENOMEM;
}
- pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
+ pr_debug("s_flags=%lX\n", sb->s_flags);
return 0;
}
@@ -530,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
char volume[32];
char *prefix = NULL;
- pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
+ pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
*flags |= MS_NODIRATIME;
@@ -578,8 +576,9 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
int free;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
- pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size,
- AFFS_SB(sb)->s_reserved);
+ pr_debug("%s() partsize=%d, reserved=%d\n",
+ __func__, AFFS_SB(sb)->s_partition_size,
+ AFFS_SB(sb)->s_reserved);
free = affs_count_free_blocks(sb);
buf->f_type = AFFS_SUPER_MAGIC;
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index ee00f08c4f53..f39b71c3981e 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -21,7 +21,7 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
char c;
char lc;
- pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
+ pr_debug("follow_link(ino=%lu)\n", inode->i_ino);
err = -EIO;
bh = affs_bread(inode->i_sb, inode->i_ino);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 232e03d4780d..5b570b6efa28 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -737,7 +737,7 @@ MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
MODULE_ALIAS("devname:autofs");
/* Register/deregister misc character device */
-int autofs_dev_ioctl_init(void)
+int __init autofs_dev_ioctl_init(void)
{
int r;
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index a2cd305a993a..9c7faa8a9288 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -318,7 +318,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds,
* befs_find_key - Search for a key within a node
* @sb: Filesystem superblock
* @node: Node to find the key within
- * @key: Keystring to search for
+ * @findkey: Keystring to search for
* @value: If key is found, the value stored with the key is put here
*
* finds exact match if one exists, and returns BEFS_BT_MATCH
@@ -405,7 +405,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node,
* Heres how it works: Key_no is the index of the key/value pair to
* return in keybuf/value.
* Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is
- * the number of charecters in the key (just a convenience).
+ * the number of characters in the key (just a convenience).
*
* Algorithm:
* Get the first leafnode of the tree. See if the requested key is in that
@@ -502,12 +502,11 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds,
"for key of size %d", __func__, bufsize, keylen);
brelse(this_node->bh);
goto error_alloc;
- };
+ }
- strncpy(keybuf, keystart, keylen);
+ strlcpy(keybuf, keystart, keylen + 1);
*value = fs64_to_cpu(sb, valarray[cur_key]);
*keysize = keylen;
- keybuf[keylen] = '\0';
befs_debug(sb, "Read [%llu,%d]: Key \"%.*s\", Value %llu", node_off,
cur_key, keylen, keybuf, *value);
@@ -707,7 +706,7 @@ befs_bt_get_key(struct super_block *sb, befs_btree_node * node,
* @key1: pointer to the first key to be compared
* @keylen1: length in bytes of key1
* @key2: pointer to the second key to be compared
- * @kelen2: length in bytes of key2
+ * @keylen2: length in bytes of key2
*
* Returns 0 if @key1 and @key2 are equal.
* Returns >0 if @key1 is greater.
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index c467bebd50af..1e8e0b8d8836 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -116,7 +116,7 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data,
* befs_read_lsmylink - read long symlink from datastream.
* @sb: Filesystem superblock
* @ds: Datastrem to read from
- * @buf: Buffer in which to place long symlink data
+ * @buff: Buffer in which to place long symlink data
* @len: Length of the long symlink in bytes
*
* Returns the number of bytes read
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index d626756ff721..a16fbd4e8241 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -133,14 +133,6 @@ befs_get_block(struct inode *inode, sector_t block,
befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld",
(unsigned long)inode->i_ino, (long)block);
-
- if (block < 0) {
- befs_error(sb, "befs_get_block() was asked for a block "
- "number less than zero: block %ld in inode %lu",
- (long)block, (unsigned long)inode->i_ino);
- return -EIO;
- }
-
if (create) {
befs_error(sb, "befs_get_block() was asked to write to "
"block %ld in inode %lu", (long)block,
@@ -396,9 +388,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
if (S_ISLNK(inode->i_mode) && !(befs_ino->i_flags & BEFS_LONG_SYMLINK)){
inode->i_size = 0;
inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE;
- strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
- BEFS_SYMLINK_LEN - 1);
- befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0';
+ strlcpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
+ BEFS_SYMLINK_LEN);
} else {
int num_blks;
@@ -591,21 +582,21 @@ befs_utf2nls(struct super_block *sb, const char *in,
/**
* befs_nls2utf - Convert NLS string to utf8 encodeing
* @sb: Superblock
- * @src: Input string buffer in NLS format
- * @srclen: Length of input string in bytes
- * @dest: The output string in UTF-8 format
- * @destlen: Length of the output buffer
+ * @in: Input string buffer in NLS format
+ * @in_len: Length of input string in bytes
+ * @out: The output string in UTF-8 format
+ * @out_len: Length of the output buffer
*
- * Converts input string @src, which is in the format of the loaded NLS map,
+ * Converts input string @in, which is in the format of the loaded NLS map,
* into a utf8 string.
*
- * The destination string @dest is allocated by this function and the caller is
+ * The destination string @out is allocated by this function and the caller is
* responsible for freeing it with kfree()
*
- * On return, *@destlen is the length of @dest in bytes.
+ * On return, *@out_len is the length of @out in bytes.
*
* On success, the return value is the number of utf8 characters written to
- * the output buffer @dest.
+ * the output buffer @out.
*
* On Failure, a negative number coresponding to the error code is returned.
*/
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index aa3cb626671e..ef242033a120 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -145,6 +145,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min(nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -206,7 +225,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
@@ -1686,7 +1705,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
static int write_note_info(struct elf_note_info *info,
struct coredump_params *cprm)
{
- bool first = 1;
+ bool first = true;
struct elf_thread_core_info *t = info->thread;
do {
@@ -1710,7 +1729,7 @@ static int write_note_info(struct elf_note_info *info,
!writenote(&t->notes[i], cprm))
return 0;
- first = 0;
+ first = false;
t = t->next;
} while (t);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d50bbe59da1e..f723cd3a455c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -380,7 +380,7 @@ failed:
/****************************************************************************/
-void old_reloc(unsigned long rl)
+static void old_reloc(unsigned long rl)
{
#ifdef DEBUG
char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
diff --git a/fs/bio.c b/fs/bio.c
index 1ba33657160f..0443694ccbb4 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -752,29 +752,31 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
return 0;
/*
- * we might lose a segment or two here, but rather that than
- * make this too complex.
+ * setup the new entry, we might clear it again later if we
+ * cannot add the page
+ */
+ bvec = &bio->bi_io_vec[bio->bi_vcnt];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = offset;
+ bio->bi_vcnt++;
+ bio->bi_phys_segments++;
+
+ /*
+ * Perform a recount if the number of segments is greater
+ * than queue_max_segments(q).
*/
- while (bio->bi_phys_segments >= queue_max_segments(q)) {
+ while (bio->bi_phys_segments > queue_max_segments(q)) {
if (retried_segments)
- return 0;
+ goto failed;
retried_segments = 1;
blk_recount_segments(q, bio);
}
/*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
- bvec = &bio->bi_io_vec[bio->bi_vcnt];
- bvec->bv_page = page;
- bvec->bv_len = len;
- bvec->bv_offset = offset;
-
- /*
* if queue has other restrictions (eg varying max sector size
* depending on offset), it can specify a merge_bvec_fn in the
* queue to get further control
@@ -791,23 +793,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
- if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- return 0;
- }
+ if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
+ goto failed;
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
- bio->bi_vcnt++;
- bio->bi_phys_segments++;
done:
bio->bi_iter.bi_size += len;
return len;
+
+ failed:
+ bvec->bv_page = NULL;
+ bvec->bv_len = 0;
+ bvec->bv_offset = 0;
+ bio->bi_vcnt--;
+ blk_recount_segments(q, bio);
+ return 0;
}
/**
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e68e150b1b16..6d7274619bf9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -364,6 +364,69 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(blkdev_fsync);
+/**
+ * bdev_read_page() - Start reading a page from a block device
+ * @bdev: The device to read the page from
+ * @sector: The offset on the device to read the page to (need not be aligned)
+ * @page: The page to read
+ *
+ * On entry, the page should be locked. It will be unlocked when the page
+ * has been read. If the block driver implements rw_page synchronously,
+ * that will be true on exit from this function, but it need not be.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to read this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+ struct page *page)
+{
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+}
+EXPORT_SYMBOL_GPL(bdev_read_page);
+
+/**
+ * bdev_write_page() - Start writing a page to a block device
+ * @bdev: The device to write the page to
+ * @sector: The offset on the device to write the page to (need not be aligned)
+ * @page: The page to write
+ * @wbc: The writeback_control for the write
+ *
+ * On entry, the page should be locked and not currently under writeback.
+ * On exit, if the write started successfully, the page will be unlocked and
+ * under writeback. If the write failed already (eg the driver failed to
+ * queue the page to the device), the page will still be locked. If the
+ * caller is a ->writepage implementation, it will need to unlock the page.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to write this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+ struct page *page, struct writeback_control *wbc)
+{
+ int result;
+ int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ set_page_writeback(page);
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
+ if (result)
+ end_page_writeback(page);
+ else
+ unlock_page(page);
+ return result;
+}
+EXPORT_SYMBOL_GPL(bdev_write_page);
+
/*
* pseudo-fs
*/
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f29a54e454d4..4cd0ac983f91 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4510,7 +4510,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
spin_unlock(&eb->refs_lock);
}
-static void mark_extent_buffer_accessed(struct extent_buffer *eb)
+static void mark_extent_buffer_accessed(struct extent_buffer *eb,
+ struct page *accessed)
{
unsigned long num_pages, i;
@@ -4519,7 +4520,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
- mark_page_accessed(p);
+ if (p != accessed)
+ mark_page_accessed(p);
}
}
@@ -4533,7 +4535,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
+ mark_extent_buffer_accessed(eb, NULL);
return eb;
}
rcu_read_unlock();
@@ -4581,7 +4583,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
spin_unlock(&mapping->private_lock);
unlock_page(p);
page_cache_release(p);
- mark_extent_buffer_accessed(exists);
+ mark_extent_buffer_accessed(exists, p);
goto free_eb;
}
@@ -4596,7 +4598,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
attach_extent_buffer_page(eb, p);
spin_unlock(&mapping->private_lock);
WARN_ON(PageDirty(p));
- mark_page_accessed(p);
eb->pages[i] = p;
if (!PageUptodate(p))
uptodate = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 17e7393c50f0..a58df835b7fb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -470,11 +470,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
for (i = 0; i < num_pages; i++) {
/* page checked is some magic around finding pages that
* have been modified without going through btrfs_set_page_dirty
- * clear it here
+ * clear it here. There should be no need to mark the pages
+ * accessed as prepare_pages should have marked them accessed
+ * in prepare_pages via find_or_create_page()
*/
ClearPageChecked(pages[i]);
unlock_page(pages[i]);
- mark_page_accessed(pages[i]);
page_cache_release(pages[i]);
}
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 6a8110c03a47..eba6e4f621ce 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -227,7 +227,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
int all_mapped = 1;
index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
- page = find_get_page(bd_mapping, index);
+ page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
if (!page)
goto out;
@@ -1366,12 +1366,13 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) {
+ /* __find_get_block_slow will mark the page accessed */
bh = __find_get_block_slow(bdev, block);
if (bh)
bh_lru_install(bh);
- }
- if (bh)
+ } else
touch_buffer(bh);
+
return bh;
}
EXPORT_SYMBOL(__find_get_block);
@@ -1483,16 +1484,27 @@ EXPORT_SYMBOL(set_bh_page);
/*
* Called when truncating a buffer on a page completely.
*/
+
+/* Bits that are cleared during an invalidate */
+#define BUFFER_FLAGS_DISCARD \
+ (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
+ 1 << BH_Delay | 1 << BH_Unwritten)
+
static void discard_buffer(struct buffer_head * bh)
{
+ unsigned long b_state, b_state_old;
+
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
- clear_buffer_mapped(bh);
- clear_buffer_req(bh);
- clear_buffer_new(bh);
- clear_buffer_delay(bh);
- clear_buffer_unwritten(bh);
+ b_state = bh->b_state;
+ for (;;) {
+ b_state_old = cmpxchg(&bh->b_state, b_state,
+ (b_state & ~BUFFER_FLAGS_DISCARD));
+ if (b_state_old == b_state)
+ break;
+ b_state = b_state_old;
+ }
unlock_buffer(bh);
}
@@ -2879,10 +2891,9 @@ EXPORT_SYMBOL(block_truncate_page);
/*
* The generic ->writepage function for buffer-backed address_spaces
- * this form passes in the end_io handler used to finish the IO.
*/
-int block_write_full_page_endio(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc, bh_end_io_t *handler)
+int block_write_full_page(struct page *page, get_block_t *get_block,
+ struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
@@ -2892,7 +2903,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
/* Is the page fully inside i_size? */
if (page->index < end_index)
return __block_write_full_page(inode, page, get_block, wbc,
- handler);
+ end_buffer_async_write);
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2915,18 +2926,8 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
- return __block_write_full_page(inode, page, get_block, wbc, handler);
-}
-EXPORT_SYMBOL(block_write_full_page_endio);
-
-/*
- * The generic ->writepage function for buffer-backed address_spaces
- */
-int block_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
-{
- return block_write_full_page_endio(page, get_block, wbc,
- end_buffer_async_write);
+ return __block_write_full_page(inode, page, get_block, wbc,
+ end_buffer_async_write);
}
EXPORT_SYMBOL(block_write_full_page);
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 5b99bafc31d1..d749731dc0ee 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -50,18 +50,18 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
cache->brun_percent < 100);
if (*args) {
- kerror("'bind' command doesn't take an argument");
+ pr_err("'bind' command doesn't take an argument");
return -EINVAL;
}
if (!cache->rootdirname) {
- kerror("No cache directory specified");
+ pr_err("No cache directory specified");
return -EINVAL;
}
/* don't permit already bound caches to be re-bound */
if (test_bit(CACHEFILES_READY, &cache->flags)) {
- kerror("Cache already bound");
+ pr_err("Cache already bound");
return -EBUSY;
}
@@ -228,9 +228,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
set_bit(CACHEFILES_READY, &cache->flags);
dput(root);
- printk(KERN_INFO "CacheFiles:"
- " File cache on %s registered\n",
- cache->cache.identifier);
+ pr_info("File cache on %s registered\n", cache->cache.identifier);
/* check how much space the cache has */
cachefiles_has_space(cache, 0, 0);
@@ -250,7 +248,7 @@ error_open_root:
kmem_cache_free(cachefiles_object_jar, fsdef);
error_root_object:
cachefiles_end_secure(cache, saved_cred);
- kerror("Failed to register: %d", ret);
+ pr_err("Failed to register: %d", ret);
return ret;
}
@@ -262,9 +260,8 @@ void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
_enter("");
if (test_bit(CACHEFILES_READY, &cache->flags)) {
- printk(KERN_INFO "CacheFiles:"
- " File cache on %s unregistering\n",
- cache->cache.identifier);
+ pr_info("File cache on %s unregistering\n",
+ cache->cache.identifier);
fscache_withdraw_cache(&cache->cache);
}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 0a1467b15516..b078d3081d6c 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -315,8 +315,7 @@ static unsigned int cachefiles_daemon_poll(struct file *file,
static int cachefiles_daemon_range_error(struct cachefiles_cache *cache,
char *args)
{
- kerror("Free space limits must be in range"
- " 0%%<=stop<cull<run<100%%");
+ pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%");
return -EINVAL;
}
@@ -476,12 +475,12 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args)
_enter(",%s", args);
if (!*args) {
- kerror("Empty directory specified");
+ pr_err("Empty directory specified");
return -EINVAL;
}
if (cache->rootdirname) {
- kerror("Second cache directory specified");
+ pr_err("Second cache directory specified");
return -EEXIST;
}
@@ -504,12 +503,12 @@ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args)
_enter(",%s", args);
if (!*args) {
- kerror("Empty security context specified");
+ pr_err("Empty security context specified");
return -EINVAL;
}
if (cache->secctx) {
- kerror("Second security context specified");
+ pr_err("Second security context specified");
return -EINVAL;
}
@@ -532,7 +531,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args)
_enter(",%s", args);
if (!*args) {
- kerror("Empty tag specified");
+ pr_err("Empty tag specified");
return -EINVAL;
}
@@ -563,12 +562,12 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
goto inval;
if (!test_bit(CACHEFILES_READY, &cache->flags)) {
- kerror("cull applied to unready cache");
+ pr_err("cull applied to unready cache");
return -EIO;
}
if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
- kerror("cull applied to dead cache");
+ pr_err("cull applied to dead cache");
return -EIO;
}
@@ -588,11 +587,11 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
notdir:
path_put(&path);
- kerror("cull command requires dirfd to be a directory");
+ pr_err("cull command requires dirfd to be a directory");
return -ENOTDIR;
inval:
- kerror("cull command requires dirfd and filename");
+ pr_err("cull command requires dirfd and filename");
return -EINVAL;
}
@@ -615,7 +614,7 @@ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args)
return 0;
inval:
- kerror("debug command requires mask");
+ pr_err("debug command requires mask");
return -EINVAL;
}
@@ -635,12 +634,12 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
goto inval;
if (!test_bit(CACHEFILES_READY, &cache->flags)) {
- kerror("inuse applied to unready cache");
+ pr_err("inuse applied to unready cache");
return -EIO;
}
if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
- kerror("inuse applied to dead cache");
+ pr_err("inuse applied to dead cache");
return -EIO;
}
@@ -660,11 +659,11 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
notdir:
path_put(&path);
- kerror("inuse command requires dirfd to be a directory");
+ pr_err("inuse command requires dirfd to be a directory");
return -ENOTDIR;
inval:
- kerror("inuse command requires dirfd and filename");
+ pr_err("inuse command requires dirfd and filename");
return -EINVAL;
}
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 57e17fe6121a..584743d456c3 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -146,8 +146,7 @@ static int cachefiles_lookup_object(struct fscache_object *_object)
if (ret < 0 && ret != -ETIMEDOUT) {
if (ret != -ENOBUFS)
- printk(KERN_WARNING
- "CacheFiles: Lookup failed error %d\n", ret);
+ pr_warn("Lookup failed error %d\n", ret);
fscache_object_lookup_error(&object->fscache);
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 5349473df1b1..3d50998abf57 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -9,6 +9,13 @@
* 2 of the Licence, or (at your option) any later version.
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "CacheFiles: " fmt
+
+
#include <linux/fscache-cache.h>
#include <linux/timer.h>
#include <linux/wait.h>
@@ -245,11 +252,10 @@ extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
/*
* error handling
*/
-#define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__)
#define cachefiles_io_error(___cache, FMT, ...) \
do { \
- kerror("I/O Error: " FMT, ##__VA_ARGS__); \
+ pr_err("I/O Error: " FMT, ##__VA_ARGS__); \
fscache_io_error(&(___cache)->cache); \
set_bit(CACHEFILES_DEAD, &(___cache)->flags); \
} while (0)
@@ -310,8 +316,8 @@ do { \
#define ASSERT(X) \
do { \
if (unlikely(!(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
@@ -319,9 +325,9 @@ do { \
#define ASSERTCMP(X, OP, Y) \
do { \
if (unlikely(!((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
- printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
(unsigned long)(X), (unsigned long)(Y)); \
BUG(); \
} \
@@ -330,8 +336,8 @@ do { \
#define ASSERTIF(C, X) \
do { \
if (unlikely((C) && !(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
@@ -339,9 +345,9 @@ do { \
#define ASSERTIFCMP(C, X, OP, Y) \
do { \
if (unlikely((C) && !((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
- printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
(unsigned long)(X), (unsigned long)(Y)); \
BUG(); \
} \
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
index 4bfa8cf43bf5..180edfb45f66 100644
--- a/fs/cachefiles/main.c
+++ b/fs/cachefiles/main.c
@@ -68,8 +68,7 @@ static int __init cachefiles_init(void)
SLAB_HWCACHE_ALIGN,
cachefiles_object_init_once);
if (!cachefiles_object_jar) {
- printk(KERN_NOTICE
- "CacheFiles: Failed to allocate an object jar\n");
+ pr_notice("Failed to allocate an object jar\n");
goto error_object_jar;
}
@@ -77,7 +76,7 @@ static int __init cachefiles_init(void)
if (ret < 0)
goto error_proc;
- printk(KERN_INFO "CacheFiles: Loaded\n");
+ pr_info("Loaded\n");
return 0;
error_proc:
@@ -85,7 +84,7 @@ error_proc:
error_object_jar:
misc_deregister(&cachefiles_dev);
error_dev:
- kerror("failed to register: %d", ret);
+ pr_err("failed to register: %d", ret);
return ret;
}
@@ -96,7 +95,7 @@ fs_initcall(cachefiles_init);
*/
static void __exit cachefiles_exit(void)
{
- printk(KERN_INFO "CacheFiles: Unloading\n");
+ pr_info("Unloading\n");
cachefiles_proc_cleanup();
kmem_cache_destroy(cachefiles_object_jar);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c0a681705104..5bf2b41e66d3 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -35,22 +35,21 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
struct fscache_cookie *cookie;
unsigned keylen, loop;
- printk(KERN_ERR "%sobject: OBJ%x\n",
- prefix, object->fscache.debug_id);
- printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
+ pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id);
+ pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
prefix, object->fscache.state->name,
object->fscache.flags, work_busy(&object->fscache.work),
object->fscache.events, object->fscache.event_mask);
- printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
+ pr_err("%sops=%u inp=%u exc=%u\n",
prefix, object->fscache.n_ops, object->fscache.n_in_progress,
object->fscache.n_exclusive);
- printk(KERN_ERR "%sparent=%p\n",
+ pr_err("%sparent=%p\n",
prefix, object->fscache.parent);
spin_lock(&object->fscache.lock);
cookie = object->fscache.cookie;
if (cookie) {
- printk(KERN_ERR "%scookie=%p [pr=%p nd=%p fl=%lx]\n",
+ pr_err("%scookie=%p [pr=%p nd=%p fl=%lx]\n",
prefix,
object->fscache.cookie,
object->fscache.cookie->parent,
@@ -62,16 +61,16 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
else
keylen = 0;
} else {
- printk(KERN_ERR "%scookie=NULL\n", prefix);
+ pr_err("%scookie=NULL\n", prefix);
keylen = 0;
}
spin_unlock(&object->fscache.lock);
if (keylen) {
- printk(KERN_ERR "%skey=[%u] '", prefix, keylen);
+ pr_err("%skey=[%u] '", prefix, keylen);
for (loop = 0; loop < keylen; loop++)
- printk("%02x", keybuf[loop]);
- printk("'\n");
+ pr_cont("%02x", keybuf[loop]);
+ pr_cont("'\n");
}
}
@@ -131,13 +130,11 @@ found_dentry:
dentry);
if (fscache_object_is_live(&object->fscache)) {
- printk(KERN_ERR "\n");
- printk(KERN_ERR "CacheFiles: Error:"
- " Can't preemptively bury live object\n");
+ pr_err("\n");
+ pr_err("Error: Can't preemptively bury live object\n");
cachefiles_printk_object(object, NULL);
} else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
- printk(KERN_ERR "CacheFiles: Error:"
- " Object already preemptively buried\n");
+ pr_err("Error: Object already preemptively buried\n");
}
write_unlock(&cache->active_lock);
@@ -160,7 +157,7 @@ try_again:
write_lock(&cache->active_lock);
if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
- printk(KERN_ERR "CacheFiles: Error: Object already active\n");
+ pr_err("Error: Object already active\n");
cachefiles_printk_object(object, NULL);
BUG();
}
@@ -193,9 +190,8 @@ try_again:
* need to wait for it to be destroyed */
wait_for_old_object:
if (fscache_object_is_live(&object->fscache)) {
- printk(KERN_ERR "\n");
- printk(KERN_ERR "CacheFiles: Error:"
- " Unexpected object collision\n");
+ pr_err("\n");
+ pr_err("Error: Unexpected object collision\n");
cachefiles_printk_object(object, xobject);
BUG();
}
@@ -241,9 +237,8 @@ wait_for_old_object:
}
if (timeout <= 0) {
- printk(KERN_ERR "\n");
- printk(KERN_ERR "CacheFiles: Error: Overlong"
- " wait for old active object to go away\n");
+ pr_err("\n");
+ pr_err("Error: Overlong wait for old active object to go away\n");
cachefiles_printk_object(object, xobject);
goto requeue;
}
@@ -548,7 +543,7 @@ lookup_again:
next, next->d_inode, next->d_inode->i_ino);
} else if (!S_ISDIR(next->d_inode->i_mode)) {
- kerror("inode %lu is not a directory",
+ pr_err("inode %lu is not a directory",
next->d_inode->i_ino);
ret = -ENOBUFS;
goto error;
@@ -579,7 +574,7 @@ lookup_again:
} else if (!S_ISDIR(next->d_inode->i_mode) &&
!S_ISREG(next->d_inode->i_mode)
) {
- kerror("inode %lu is not a file or directory",
+ pr_err("inode %lu is not a file or directory",
next->d_inode->i_ino);
ret = -ENOBUFS;
goto error;
@@ -773,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
ASSERT(subdir->d_inode);
if (!S_ISDIR(subdir->d_inode->i_mode)) {
- kerror("%s is not a directory", dirname);
+ pr_err("%s is not a directory", dirname);
ret = -EIO;
goto check_error;
}
@@ -800,13 +795,13 @@ check_error:
mkdir_error:
mutex_unlock(&dir->d_inode->i_mutex);
dput(subdir);
- kerror("mkdir %s failed with error %d", dirname, ret);
+ pr_err("mkdir %s failed with error %d", dirname, ret);
return ERR_PTR(ret);
lookup_error:
mutex_unlock(&dir->d_inode->i_mutex);
ret = PTR_ERR(subdir);
- kerror("Lookup %s failed with error %d", dirname, ret);
+ pr_err("Lookup %s failed with error %d", dirname, ret);
return ERR_PTR(ret);
nomem_d_alloc:
@@ -896,7 +891,7 @@ lookup_error:
if (ret == -EIO) {
cachefiles_io_error(cache, "Lookup failed");
} else if (ret != -ENOMEM) {
- kerror("Internal error: %d", ret);
+ pr_err("Internal error: %d", ret);
ret = -EIO;
}
@@ -955,7 +950,7 @@ error:
}
if (ret != -ENOMEM) {
- kerror("Internal error: %d", ret);
+ pr_err("Internal error: %d", ret);
ret = -EIO;
}
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index 039b5011d83b..396c18ea2764 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -34,9 +34,7 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache)
ret = set_security_override_from_ctx(new, cache->secctx);
if (ret < 0) {
put_cred(new);
- printk(KERN_ERR "CacheFiles:"
- " Security denies permission to nominate"
- " security context: error %d\n",
+ pr_err("Security denies permission to nominate security context: error %d\n",
ret);
goto error;
}
@@ -59,16 +57,14 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
ret = security_inode_mkdir(root->d_inode, root, 0);
if (ret < 0) {
- printk(KERN_ERR "CacheFiles:"
- " Security denies permission to make dirs: error %d",
+ pr_err("Security denies permission to make dirs: error %d",
ret);
return ret;
}
ret = security_inode_create(root->d_inode, root, 0);
if (ret < 0)
- printk(KERN_ERR "CacheFiles:"
- " Security denies permission to create files: error %d",
+ pr_err("Security denies permission to create files: error %d",
ret);
return ret;
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 12b0eef84183..1ad51ffbb275 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -51,7 +51,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
}
if (ret != -EEXIST) {
- kerror("Can't set xattr on %*.*s [%lu] (err %d)",
+ pr_err("Can't set xattr on %*.*s [%lu] (err %d)",
dentry->d_name.len, dentry->d_name.len,
dentry->d_name.name, dentry->d_inode->i_ino,
-ret);
@@ -64,7 +64,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
if (ret == -ERANGE)
goto bad_type_length;
- kerror("Can't read xattr on %*.*s [%lu] (err %d)",
+ pr_err("Can't read xattr on %*.*s [%lu] (err %d)",
dentry->d_name.len, dentry->d_name.len,
dentry->d_name.name, dentry->d_inode->i_ino,
-ret);
@@ -85,14 +85,14 @@ error:
return ret;
bad_type_length:
- kerror("Cache object %lu type xattr length incorrect",
+ pr_err("Cache object %lu type xattr length incorrect",
dentry->d_inode->i_ino);
ret = -EIO;
goto error;
bad_type:
xtype[2] = 0;
- kerror("Cache object %*.*s [%lu] type %s not %s",
+ pr_err("Cache object %*.*s [%lu] type %s not %s",
dentry->d_name.len, dentry->d_name.len,
dentry->d_name.name, dentry->d_inode->i_ino,
xtype, type);
@@ -293,7 +293,7 @@ error:
return ret;
bad_type_length:
- kerror("Cache object %lu xattr length incorrect",
+ pr_err("Cache object %lu xattr length incorrect",
dentry->d_inode->i_ino);
ret = -EIO;
goto error;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 342ca5e423f9..4f3f69079f36 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -694,7 +694,7 @@ static int ceph_writepages_start(struct address_space *mapping,
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
- pr_warning("writepage_start %p on forced umount\n", inode);
+ pr_warn("writepage_start %p on forced umount\n", inode);
return -EIO; /* we're in a forced umount, don't write! */
}
if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 16b54aa31f08..5a743ac141ab 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -71,9 +71,9 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
if (req->r_got_unsafe)
- seq_printf(s, "\t(unsafe)");
+ seq_puts(s, "\t(unsafe)");
else
- seq_printf(s, "\t");
+ seq_puts(s, "\t");
if (req->r_inode) {
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
@@ -119,7 +119,7 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, " %s", req->r_path2);
}
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
mutex_unlock(&mdsc->mutex);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 233c6f96910a..e4fff9ff1c27 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -821,7 +821,7 @@ no_change:
spin_unlock(&ci->i_ceph_lock);
}
} else if (cap_fmode >= 0) {
- pr_warning("mds issued no caps on %llx.%llx\n",
+ pr_warn("mds issued no caps on %llx.%llx\n",
ceph_vinop(inode));
__ceph_get_fmode(ci, cap_fmode);
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2b4d093d0563..9a33b98cb000 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2218,13 +2218,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* dup? */
if ((req->r_got_unsafe && !head->safe) ||
(req->r_got_safe && head->safe)) {
- pr_warning("got a dup %s reply on %llu from mds%d\n",
+ pr_warn("got a dup %s reply on %llu from mds%d\n",
head->safe ? "safe" : "unsafe", tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
}
if (req->r_got_safe && !head->safe) {
- pr_warning("got unsafe after safe on %llu from mds%d\n",
+ pr_warn("got unsafe after safe on %llu from mds%d\n",
tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
@@ -3525,7 +3525,7 @@ static void peer_reset(struct ceph_connection *con)
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
- pr_warning("mds%d closed our session\n", s->s_mds);
+ pr_warn("mds%d closed our session\n", s->s_mds);
send_mds_reconnect(mdsc, s);
}
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 132b64eeecd4..261531e55e9d 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -62,7 +62,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
ceph_decode_16_safe(p, end, version, bad);
if (version > 3) {
- pr_warning("got mdsmap version %d > 3, failing", version);
+ pr_warn("got mdsmap version %d > 3, failing", version);
goto bad;
}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7ff866dbb89e..54ac0e8ad96c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
/* security id for Authenticated Users system group */
static const struct cifs_sid sid_authusers = {
- 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6ce4e0954b98..c3dc52e8b40e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2430,14 +2430,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
}
parm_data = (struct cifs_posix_lock *)
((char *)&pSMBr->hdr.Protocol + data_offset);
- if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+ if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
pLockData->fl_type = F_UNLCK;
else {
if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_RDLCK))
+ cpu_to_le16(CIFS_RDLCK))
pLockData->fl_type = F_RDLCK;
else if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_WRLCK))
+ cpu_to_le16(CIFS_WRLCK))
pLockData->fl_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3232,25 +3232,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
pSMB->TotalParameterCount = 0;
- pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ pSMB->TotalDataCount = cpu_to_le32(2);
pSMB->MaxParameterCount = 0;
pSMB->MaxDataCount = 0;
pSMB->MaxSetupCount = 4;
pSMB->Reserved = 0;
pSMB->ParameterOffset = 0;
- pSMB->DataCount = __constant_cpu_to_le32(2);
+ pSMB->DataCount = cpu_to_le32(2);
pSMB->DataOffset =
cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
compression_state) - 4); /* 84 */
pSMB->SetupCount = 4;
- pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
pSMB->ParameterCount = 0;
- pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
pSMB->IsFsctl = 1; /* FSCTL */
pSMB->IsRootFlag = 0;
pSMB->Fid = fid; /* file handle always le */
/* 3 byte pad, followed by 2 byte compress state */
- pSMB->ByteCount = __constant_cpu_to_le16(5);
+ pSMB->ByteCount = cpu_to_le16(5);
inc_rfc1001_len(pSMB, 5);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3386,10 +3386,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
cifs_acl->version = cpu_to_le16(1);
if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
} else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
} else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index e87387dbf39f..27e6175a7f7e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
- pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+ pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b8021fde987d..36867bd21d04 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* indexed by command in host byte order
*/
static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
- /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
- /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
- /* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
- /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
- /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
- /* SMB2_CREATE */ __constant_cpu_to_le16(89),
- /* SMB2_CLOSE */ __constant_cpu_to_le16(60),
- /* SMB2_FLUSH */ __constant_cpu_to_le16(4),
- /* SMB2_READ */ __constant_cpu_to_le16(17),
- /* SMB2_WRITE */ __constant_cpu_to_le16(17),
- /* SMB2_LOCK */ __constant_cpu_to_le16(4),
- /* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+ /* SMB2_NEGOTIATE */ cpu_to_le16(65),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(89),
+ /* SMB2_CLOSE */ cpu_to_le16(60),
+ /* SMB2_FLUSH */ cpu_to_le16(4),
+ /* SMB2_READ */ cpu_to_le16(17),
+ /* SMB2_WRITE */ cpu_to_le16(17),
+ /* SMB2_LOCK */ cpu_to_le16(4),
+ /* SMB2_IOCTL */ cpu_to_le16(49),
/* BB CHECK this ... not listed in documentation */
- /* SMB2_CANCEL */ __constant_cpu_to_le16(0),
- /* SMB2_ECHO */ __constant_cpu_to_le16(4),
- /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
- /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
- /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
- /* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+ /* SMB2_CANCEL */ cpu_to_le16(0),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(9),
+ /* SMB2_SET_INFO */ cpu_to_le16(2),
/* BB FIXME can also be 44 for lease break */
- /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
};
int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 787844bde384..7f99a0f956e4 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -547,7 +547,7 @@ smb2_clone_range(const unsigned int xid,
goto cchunk_out;
/* For now array only one chunk long, will make more flexible later */
- pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+ pcchunk->ChunkCount = cpu_to_le32(1);
pcchunk->Reserved = 0;
pcchunk->Reserved2 = 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0037b609c54..78b568467bb6 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1364,7 +1364,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
char *ret_data = NULL;
fsctl_input.CompressionState =
- __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d3952..d03adad014db 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
/* BB FIXME - analyze following length BB */
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
/*
* SMB2 Header Definition
@@ -96,7 +96,7 @@
*
*/
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
} __packed;
/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
/*
* SMB2 flag definitions
*/
-#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
*
*/
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
struct smb2_hdr hdr;
@@ -500,12 +500,12 @@ struct create_context {
#define SMB2_LEASE_HANDLE_CACHING_HE 0x02
#define SMB2_LEASE_WRITE_CACHING_HE 0x04
-#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04)
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
#define SMB2_LEASE_KEY_SIZE 16
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 911cf30d057d..7740b1c871c1 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -101,7 +101,7 @@ struct inode *coda_cnode_make(struct CodaFid *fid, struct super_block *sb)
inode = coda_iget(sb, fid, &attr);
if (IS_ERR(inode))
- printk("coda_cnode_make: coda_iget failed\n");
+ pr_warn("%s: coda_iget failed\n", __func__);
return inode;
}
@@ -137,7 +137,7 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb)
unsigned long hash = coda_f2i(fid);
if ( !sb ) {
- printk("coda_fid_to_inode: no sb!\n");
+ pr_warn("%s: no sb!\n", __func__);
return NULL;
}
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index e7550cb9fb74..d42b725b1d21 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -12,6 +12,12 @@
#ifndef _LINUX_CODA_FS
#define _LINUX_CODA_FS
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/mm.h>
@@ -63,7 +69,7 @@ void coda_sysctl_clean(void);
else \
ptr = (cast)vzalloc((unsigned long) size); \
if (!ptr) \
- printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \
+ pr_warn("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \
} while (0)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5efbb5ee0adc..cd8a63238b11 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -102,7 +102,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
int type = 0;
if (length > CODA_MAXNAMLEN) {
- printk(KERN_ERR "name too long: lookup, %s (%*s)\n",
+ pr_err("name too long: lookup, %s (%*s)\n",
coda_i2s(dir), (int)length, name);
return ERR_PTR(-ENAMETOOLONG);
}
@@ -453,23 +453,23 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx)
ret = kernel_read(host_file, ctx->pos - 2, (char *)vdir,
sizeof(*vdir));
if (ret < 0) {
- printk(KERN_ERR "coda readdir: read dir %s failed %d\n",
- coda_f2s(&cii->c_fid), ret);
+ pr_err("%s: read dir %s failed %d\n",
+ __func__, coda_f2s(&cii->c_fid), ret);
break;
}
if (ret == 0) break; /* end of directory file reached */
/* catch truncated reads */
if (ret < vdir_size || ret < vdir_size + vdir->d_namlen) {
- printk(KERN_ERR "coda readdir: short read on %s\n",
- coda_f2s(&cii->c_fid));
+ pr_err("%s: short read on %s\n",
+ __func__, coda_f2s(&cii->c_fid));
ret = -EBADF;
break;
}
/* validate whether the directory file actually makes sense */
if (vdir->d_reclen < vdir_size + vdir->d_namlen) {
- printk(KERN_ERR "coda readdir: invalid dir %s\n",
- coda_f2s(&cii->c_fid));
+ pr_err("%s: invalid dir %s\n",
+ __func__, coda_f2s(&cii->c_fid));
ret = -EBADF;
break;
}
@@ -589,8 +589,8 @@ int coda_revalidate_inode(struct inode *inode)
coda_vattr_to_iattr(inode, &attr);
if ((old_mode & S_IFMT) != (inode->i_mode & S_IFMT)) {
- printk("Coda: inode %ld, fid %s changed type!\n",
- inode->i_ino, coda_f2s(&(cii->c_fid)));
+ pr_warn("inode %ld, fid %s changed type!\n",
+ inode->i_ino, coda_f2s(&(cii->c_fid)));
}
/* the following can happen when a local fid is replaced
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d9c7751f10ac..fe3afb2de880 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -119,12 +119,12 @@ static int get_device_index(struct coda_mount_data *data)
int idx;
if (data == NULL) {
- printk("coda_read_super: Bad mount data\n");
+ pr_warn("%s: Bad mount data\n", __func__);
return -1;
}
if (data->version != CODA_MOUNT_VERSION) {
- printk("coda_read_super: Bad mount version\n");
+ pr_warn("%s: Bad mount version\n", __func__);
return -1;
}
@@ -141,13 +141,13 @@ static int get_device_index(struct coda_mount_data *data)
fdput(f);
if (idx < 0 || idx >= MAX_CODADEVS) {
- printk("coda_read_super: Bad minor number\n");
+ pr_warn("%s: Bad minor number\n", __func__);
return -1;
}
return idx;
Ebadf:
- printk("coda_read_super: Bad file\n");
+ pr_warn("%s: Bad file\n", __func__);
return -1;
}
@@ -168,19 +168,19 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
if(idx == -1)
idx = 0;
- printk(KERN_INFO "coda_read_super: device index: %i\n", idx);
+ pr_info("%s: device index: %i\n", __func__, idx);
vc = &coda_comms[idx];
mutex_lock(&vc->vc_mutex);
if (!vc->vc_inuse) {
- printk("coda_read_super: No pseudo device\n");
+ pr_warn("%s: No pseudo device\n", __func__);
error = -EINVAL;
goto unlock_out;
}
if (vc->vc_sb) {
- printk("coda_read_super: Device already mounted\n");
+ pr_warn("%s: Device already mounted\n", __func__);
error = -EBUSY;
goto unlock_out;
}
@@ -204,22 +204,23 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
/* get root fid from Venus: this needs the root inode */
error = venus_rootfid(sb, &fid);
if ( error ) {
- printk("coda_read_super: coda_get_rootfid failed with %d\n",
- error);
+ pr_warn("%s: coda_get_rootfid failed with %d\n",
+ __func__, error);
goto error;
}
- printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid));
+ pr_info("%s: rootfid is %s\n", __func__, coda_f2s(&fid));
/* make root inode */
root = coda_cnode_make(&fid, sb);
if (IS_ERR(root)) {
error = PTR_ERR(root);
- printk("Failure of coda_cnode_make for root: error %d\n", error);
+ pr_warn("Failure of coda_cnode_make for root: error %d\n",
+ error);
goto error;
}
- printk("coda_read_super: rootinode is %ld dev %s\n",
- root->i_ino, root->i_sb->s_id);
+ pr_info("%s: rootinode is %ld dev %s\n",
+ __func__, root->i_ino, root->i_sb->s_id);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
error = -EINVAL;
@@ -246,7 +247,7 @@ static void coda_put_super(struct super_block *sb)
sb->s_fs_info = NULL;
mutex_unlock(&vcp->vc_mutex);
- printk("Coda: Bye bye.\n");
+ pr_info("Bye bye.\n");
}
static void coda_evict_inode(struct inode *inode)
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index ebc2bae6c289..5c1e4242368b 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -114,14 +114,14 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
int size = sizeof(*dcbuf);
if ( nbytes < sizeof(struct coda_out_hdr) ) {
- printk("coda_downcall opc %d uniq %d, not enough!\n",
- hdr.opcode, hdr.unique);
+ pr_warn("coda_downcall opc %d uniq %d, not enough!\n",
+ hdr.opcode, hdr.unique);
count = nbytes;
goto out;
}
if ( nbytes > size ) {
- printk("Coda: downcall opc %d, uniq %d, too much!",
- hdr.opcode, hdr.unique);
+ pr_warn("downcall opc %d, uniq %d, too much!",
+ hdr.opcode, hdr.unique);
nbytes = size;
}
CODA_ALLOC(dcbuf, union outputArgs *, nbytes);
@@ -136,7 +136,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
CODA_FREE(dcbuf, nbytes);
if (error) {
- printk("psdev_write: coda_downcall error: %d\n", error);
+ pr_warn("%s: coda_downcall error: %d\n",
+ __func__, error);
retval = error;
goto out;
}
@@ -157,16 +158,17 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
mutex_unlock(&vcp->vc_mutex);
if (!req) {
- printk("psdev_write: msg (%d, %d) not found\n",
- hdr.opcode, hdr.unique);
+ pr_warn("%s: msg (%d, %d) not found\n",
+ __func__, hdr.opcode, hdr.unique);
retval = -ESRCH;
goto out;
}
/* move data into response buffer. */
if (req->uc_outSize < nbytes) {
- printk("psdev_write: too much cnt: %d, cnt: %ld, opc: %d, uniq: %d.\n",
- req->uc_outSize, (long)nbytes, hdr.opcode, hdr.unique);
+ pr_warn("%s: too much cnt: %d, cnt: %ld, opc: %d, uniq: %d.\n",
+ __func__, req->uc_outSize, (long)nbytes,
+ hdr.opcode, hdr.unique);
nbytes = req->uc_outSize; /* don't have more space! */
}
if (copy_from_user(req->uc_data, buf, nbytes)) {
@@ -240,8 +242,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
/* Move the input args into userspace */
count = req->uc_inSize;
if (nbytes < req->uc_inSize) {
- printk ("psdev_read: Venus read %ld bytes of %d in message\n",
- (long)nbytes, req->uc_inSize);
+ pr_warn("%s: Venus read %ld bytes of %d in message\n",
+ __func__, (long)nbytes, req->uc_inSize);
count = nbytes;
}
@@ -305,7 +307,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
struct upc_req *req, *tmp;
if (!vcp || !vcp->vc_inuse ) {
- printk("psdev_release: Not open.\n");
+ pr_warn("%s: Not open.\n", __func__);
return -1;
}
@@ -354,8 +356,8 @@ static int init_coda_psdev(void)
{
int i, err = 0;
if (register_chrdev(CODA_PSDEV_MAJOR, "coda", &coda_psdev_fops)) {
- printk(KERN_ERR "coda_psdev: unable to get major %d\n",
- CODA_PSDEV_MAJOR);
+ pr_err("%s: unable to get major %d\n",
+ __func__, CODA_PSDEV_MAJOR);
return -EIO;
}
coda_psdev_class = class_create(THIS_MODULE, "coda");
@@ -393,13 +395,13 @@ static int __init init_coda(void)
goto out2;
status = init_coda_psdev();
if ( status ) {
- printk("Problem (%d) in init_coda_psdev\n", status);
+ pr_warn("Problem (%d) in init_coda_psdev\n", status);
goto out1;
}
status = register_filesystem(&coda_fs_type);
if (status) {
- printk("coda: failed to register filesystem!\n");
+ pr_warn("failed to register filesystem!\n");
goto out;
}
return 0;
@@ -420,9 +422,8 @@ static void __exit exit_coda(void)
int err, i;
err = unregister_filesystem(&coda_fs_type);
- if ( err != 0 ) {
- printk("coda: failed to unregister filesystem\n");
- }
+ if (err != 0)
+ pr_warn("failed to unregister filesystem\n");
for (i = 0; i < MAX_CODADEVS; i++)
device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
class_destroy(coda_psdev_class);
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 3a731976dc5e..21fcf8dcb9cd 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -508,8 +508,8 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid,
inp->coda_ioctl.data = (char *)(INSIZE(ioctl));
/* get the data out of user space */
- if ( copy_from_user((char*)inp + (long)inp->coda_ioctl.data,
- data->vi.in, data->vi.in_size) ) {
+ if (copy_from_user((char *)inp + (long)inp->coda_ioctl.data,
+ data->vi.in, data->vi.in_size)) {
error = -EINVAL;
goto exit;
}
@@ -518,8 +518,8 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid,
&outsize, inp);
if (error) {
- printk("coda_pioctl: Venus returns: %d for %s\n",
- error, coda_f2s(fid));
+ pr_warn("%s: Venus returns: %d for %s\n",
+ __func__, error, coda_f2s(fid));
goto exit;
}
@@ -675,7 +675,7 @@ static int coda_upcall(struct venus_comm *vcp,
mutex_lock(&vcp->vc_mutex);
if (!vcp->vc_inuse) {
- printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n");
+ pr_notice("Venus dead, not sending upcall\n");
error = -ENXIO;
goto exit;
}
@@ -725,7 +725,7 @@ static int coda_upcall(struct venus_comm *vcp,
error = -EINTR;
if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
- printk(KERN_WARNING "coda: Unexpected interruption.\n");
+ pr_warn("Unexpected interruption.\n");
goto exit;
}
@@ -735,7 +735,7 @@ static int coda_upcall(struct venus_comm *vcp,
/* Venus saw the upcall, make sure we can send interrupt signal */
if (!vcp->vc_inuse) {
- printk(KERN_INFO "coda: Venus dead, not sending signal.\n");
+ pr_info("Venus dead, not sending signal.\n");
goto exit;
}
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index b5f0a3b91f18..bd4a3c167091 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -24,6 +24,12 @@
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e081acbac2e7..668dcabc5695 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -940,9 +940,9 @@ static void client_drop_item(struct config_item *parent_item,
#ifdef DEBUG
static void configfs_dump_one(struct configfs_dirent *sd, int level)
{
- printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
+ pr_info("%*s\"%s\":\n", level, " ", configfs_get_name(sd));
-#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
+#define type_print(_type) if (sd->s_type & _type) pr_info("%*s %s\n", level, " ", #_type);
type_print(CONFIGFS_ROOT);
type_print(CONFIGFS_DIR);
type_print(CONFIGFS_ITEM_ATTR);
@@ -1699,7 +1699,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
struct dentry *root = dentry->d_sb->s_root;
if (dentry->d_parent != root) {
- printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
+ pr_err("Tried to unregister non-subsystem!\n");
return;
}
@@ -1709,7 +1709,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
if (configfs_detach_prep(dentry, NULL)) {
- printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
+ pr_err("Tried to unregister non-empty subsystem!\n");
}
spin_unlock(&configfs_dirent_lock);
mutex_unlock(&configfs_symlink_mutex);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index a9d35b0e06cf..5946ad98053f 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -168,9 +168,8 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
* In practice the maximum level of locking depth is
* already reached. Just inform about possible reasons.
*/
- printk(KERN_INFO "configfs: Too many levels of inodes"
- " for the locking correctness validator.\n");
- printk(KERN_INFO "Spurious warnings may appear.\n");
+ pr_info("Too many levels of inodes for the locking correctness validator.\n");
+ pr_info("Spurious warnings may appear.\n");
}
}
}
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 50cee7f9110b..e65f9ffbb999 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -19,7 +19,7 @@
* Boston, MA 021110-1307, USA.
*
* Based on kobject:
- * kobject is Copyright (c) 2002-2003 Patrick Mochel
+ * kobject is Copyright (c) 2002-2003 Patrick Mochel
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*
@@ -35,9 +35,9 @@
#include <linux/configfs.h>
-static inline struct config_item * to_item(struct list_head * entry)
+static inline struct config_item *to_item(struct list_head *entry)
{
- return container_of(entry,struct config_item,ci_entry);
+ return container_of(entry, struct config_item, ci_entry);
}
/* Evil kernel */
@@ -47,34 +47,35 @@ static void config_item_release(struct kref *kref);
* config_item_init - initialize item.
* @item: item in question.
*/
-void config_item_init(struct config_item * item)
+void config_item_init(struct config_item *item)
{
kref_init(&item->ci_kref);
INIT_LIST_HEAD(&item->ci_entry);
}
+EXPORT_SYMBOL(config_item_init);
/**
* config_item_set_name - Set the name of an item
* @item: item.
- * @name: name.
+ * @fmt: The vsnprintf()'s format string.
*
* If strlen(name) >= CONFIGFS_ITEM_NAME_LEN, then use a
* dynamically allocated string that @item->ci_name points to.
* Otherwise, use the static @item->ci_namebuf array.
*/
-int config_item_set_name(struct config_item * item, const char * fmt, ...)
+int config_item_set_name(struct config_item *item, const char *fmt, ...)
{
int error = 0;
int limit = CONFIGFS_ITEM_NAME_LEN;
int need;
va_list args;
- char * name;
+ char *name;
/*
* First, try the static array
*/
- va_start(args,fmt);
- need = vsnprintf(item->ci_namebuf,limit,fmt,args);
+ va_start(args, fmt);
+ need = vsnprintf(item->ci_namebuf, limit, fmt, args);
va_end(args);
if (need < limit)
name = item->ci_namebuf;
@@ -83,13 +84,13 @@ int config_item_set_name(struct config_item * item, const char * fmt, ...)
* Need more space? Allocate it and try again
*/
limit = need + 1;
- name = kmalloc(limit,GFP_KERNEL);
+ name = kmalloc(limit, GFP_KERNEL);
if (!name) {
error = -ENOMEM;
goto Done;
}
- va_start(args,fmt);
- need = vsnprintf(name,limit,fmt,args);
+ va_start(args, fmt);
+ need = vsnprintf(name, limit, fmt, args);
va_end(args);
/* Still? Give up. */
@@ -109,7 +110,6 @@ int config_item_set_name(struct config_item * item, const char * fmt, ...)
Done:
return error;
}
-
EXPORT_SYMBOL(config_item_set_name);
void config_item_init_type_name(struct config_item *item,
@@ -131,20 +131,21 @@ void config_group_init_type_name(struct config_group *group, const char *name,
}
EXPORT_SYMBOL(config_group_init_type_name);
-struct config_item * config_item_get(struct config_item * item)
+struct config_item *config_item_get(struct config_item *item)
{
if (item)
kref_get(&item->ci_kref);
return item;
}
+EXPORT_SYMBOL(config_item_get);
-static void config_item_cleanup(struct config_item * item)
+static void config_item_cleanup(struct config_item *item)
{
- struct config_item_type * t = item->ci_type;
- struct config_group * s = item->ci_group;
- struct config_item * parent = item->ci_parent;
+ struct config_item_type *t = item->ci_type;
+ struct config_group *s = item->ci_group;
+ struct config_item *parent = item->ci_parent;
- pr_debug("config_item %s: cleaning up\n",config_item_name(item));
+ pr_debug("config_item %s: cleaning up\n", config_item_name(item));
if (item->ci_name != item->ci_namebuf)
kfree(item->ci_name);
item->ci_name = NULL;
@@ -167,21 +168,23 @@ static void config_item_release(struct kref *kref)
*
* Decrement the refcount, and if 0, call config_item_cleanup().
*/
-void config_item_put(struct config_item * item)
+void config_item_put(struct config_item *item)
{
if (item)
kref_put(&item->ci_kref, config_item_release);
}
+EXPORT_SYMBOL(config_item_put);
/**
* config_group_init - initialize a group for use
- * @k: group
+ * @group: config_group
*/
void config_group_init(struct config_group *group)
{
config_item_init(&group->cg_item);
INIT_LIST_HEAD(&group->cg_children);
}
+EXPORT_SYMBOL(config_group_init);
/**
* config_group_find_item - search for item in group.
@@ -195,11 +198,11 @@ void config_group_init(struct config_group *group)
struct config_item *config_group_find_item(struct config_group *group,
const char *name)
{
- struct list_head * entry;
- struct config_item * ret = NULL;
+ struct list_head *entry;
+ struct config_item *ret = NULL;
- list_for_each(entry,&group->cg_children) {
- struct config_item * item = to_item(entry);
+ list_for_each(entry, &group->cg_children) {
+ struct config_item *item = to_item(entry);
if (config_item_name(item) &&
!strcmp(config_item_name(item), name)) {
ret = config_item_get(item);
@@ -208,9 +211,4 @@ struct config_item *config_group_find_item(struct config_group *group,
}
return ret;
}
-
-EXPORT_SYMBOL(config_item_init);
-EXPORT_SYMBOL(config_group_init);
-EXPORT_SYMBOL(config_item_get);
-EXPORT_SYMBOL(config_item_put);
EXPORT_SYMBOL(config_group_find_item);
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 7f26c3cf75ae..f6c285833390 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -85,7 +85,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
} else {
- pr_debug("configfs: could not get root inode\n");
+ pr_debug("could not get root inode\n");
return -ENOMEM;
}
@@ -155,7 +155,7 @@ static int __init configfs_init(void)
return 0;
out4:
- printk(KERN_ERR "configfs: Unable to register filesystem!\n");
+ pr_err("Unable to register filesystem!\n");
configfs_inode_exit();
out3:
kobject_put(config_kobj);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c71038079b47..db4422c6c1de 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -10,6 +10,8 @@
*
* ------------------------------------------------------------------------- */
+#define pr_fmt(fmt) "devpts: " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
@@ -148,10 +150,10 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
/*
* parse_mount_options():
- * Set @opts to mount options specified in @data. If an option is not
- * specified in @data, set it to its default value. The exception is
- * 'newinstance' option which can only be set/cleared on a mount (i.e.
- * cannot be changed during remount).
+ * Set @opts to mount options specified in @data. If an option is not
+ * specified in @data, set it to its default value. The exception is
+ * 'newinstance' option which can only be set/cleared on a mount (i.e.
+ * cannot be changed during remount).
*
* Note: @data may be NULL (in which case all options are set to default).
*/
@@ -225,7 +227,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
break;
#endif
default:
- printk(KERN_ERR "devpts: called with bogus options\n");
+ pr_err("called with bogus options\n");
return -EINVAL;
}
}
@@ -261,7 +263,7 @@ static int mknod_ptmx(struct super_block *sb)
dentry = d_alloc_name(root, "ptmx");
if (!dentry) {
- printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n");
+ pr_err("Unable to alloc dentry for ptmx node\n");
goto out;
}
@@ -270,7 +272,7 @@ static int mknod_ptmx(struct super_block *sb)
*/
inode = new_inode(sb);
if (!inode) {
- printk(KERN_ERR "Unable to alloc inode for ptmx node\n");
+ pr_err("Unable to alloc inode for ptmx node\n");
dput(dentry);
goto out;
}
@@ -303,7 +305,7 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
#else
static inline void update_ptmx_mode(struct pts_fs_info *fsi)
{
- return;
+ return;
}
#endif
@@ -333,9 +335,11 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
struct pts_mount_opts *opts = &fsi->mount_opts;
if (opts->setuid)
- seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, opts->uid));
+ seq_printf(seq, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->uid));
if (opts->setgid)
- seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid));
+ seq_printf(seq, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->gid));
seq_printf(seq, ",mode=%03o", opts->mode);
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
@@ -396,7 +400,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
if (s->s_root)
return 0;
- printk(KERN_ERR "devpts: get root dentry failed\n");
+ pr_err("get root dentry failed\n");
fail:
return -ENOMEM;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6ca41e7a3fe3..e4c0fcfac102 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1044,6 +1044,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
* allocating. If we are looking at the buddy cache we would
* have taken a reference using ext4_mb_load_buddy and that
* would have pinned buddy page to page cache.
+ * The call to ext4_mb_get_buddy_page_lock will mark the
+ * page accessed.
*/
ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
@@ -1062,7 +1064,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
ret = -EIO;
goto err;
}
- mark_page_accessed(page);
if (e4b.bd_buddy_page == NULL) {
/*
@@ -1082,7 +1083,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
ret = -EIO;
goto err;
}
- mark_page_accessed(page);
err:
ext4_mb_put_buddy_page_lock(&e4b);
return ret;
@@ -1141,7 +1141,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
/* we could use find_or_create_page(), but it locks page
* what we'd like to avoid in fast path ... */
- page = find_get_page(inode->i_mapping, pnum);
+ page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
if (page == NULL || !PageUptodate(page)) {
if (page)
/*
@@ -1176,15 +1176,16 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
ret = -EIO;
goto err;
}
+
+ /* Pages marked accessed already */
e4b->bd_bitmap_page = page;
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
- mark_page_accessed(page);
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_get_page(inode->i_mapping, pnum);
+ page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
if (page == NULL || !PageUptodate(page)) {
if (page)
page_cache_release(page);
@@ -1209,9 +1210,10 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
ret = -EIO;
goto err;
}
+
+ /* Pages marked accessed already */
e4b->bd_buddy_page = page;
e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
- mark_page_accessed(page);
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4cb2743cb2e3..4a2dd079c4a6 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -433,7 +433,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
block_start = bh_offset(bh);
if (block_start >= len) {
/*
- * Comments copied from block_write_full_page_endio:
+ * Comments copied from block_write_full_page:
*
* The page straddles i_size. It must be zeroed out on
* each and every writepage invocation because it may
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index fe968c7bfc90..46bfce3879c2 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -69,7 +69,6 @@ repeat:
goto repeat;
}
out:
- mark_page_accessed(page);
return page;
}
@@ -137,13 +136,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
if (!page)
continue;
if (PageUptodate(page)) {
- mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
- mark_page_accessed(page);
f2fs_put_page(page, 0);
}
out:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 3d60d3d34ed2..83618fa43b7a 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -970,7 +970,6 @@ repeat:
goto repeat;
}
got_it:
- mark_page_accessed(page);
return page;
}
@@ -1025,7 +1024,6 @@ page_hit:
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
- mark_page_accessed(page);
return page;
}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..e26bc9a22ac9 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -303,6 +303,31 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
return dclus;
}
+static int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap)
+{
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int cluster, offset;
+
+ cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
+ offset = sector & (sbi->sec_per_clus - 1);
+ cluster = fat_bmap_cluster(inode, cluster);
+
+ if (cluster < 0)
+ return cluster;
+
+ else if (cluster) {
+ *bmap = fat_clus_to_blknr(sbi, cluster) + offset;
+ *mapped_blocks = sbi->sec_per_clus - offset;
+ if (*mapped_blocks > last_block - sector)
+ *mapped_blocks = last_block - sector;
+ }
+
+ return 0;
+}
+
int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
unsigned long *mapped_blocks, int create)
{
@@ -311,7 +336,6 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
const unsigned long blocksize = sb->s_blocksize;
const unsigned char blocksize_bits = sb->s_blocksize_bits;
sector_t last_block;
- int cluster, offset;
*phys = 0;
*mapped_blocks = 0;
@@ -329,25 +353,39 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
return 0;
/*
- * ->mmu_private can access on only allocation path.
- * (caller must hold ->i_mutex)
+ * Both ->mmu_private and ->i_disksize can access
+ * on only allocation path. (caller must hold ->i_mutex)
*/
- last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
>> blocksize_bits;
if (sector >= last_block)
return 0;
}
- cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
- offset = sector & (sbi->sec_per_clus - 1);
- cluster = fat_bmap_cluster(inode, cluster);
- if (cluster < 0)
- return cluster;
- else if (cluster) {
- *phys = fat_clus_to_blknr(sbi, cluster) + offset;
- *mapped_blocks = sbi->sec_per_clus - offset;
- if (*mapped_blocks > last_block - sector)
- *mapped_blocks = last_block - sector;
- }
- return 0;
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ phys);
+}
+
+int fat_bmap2(struct inode *inode, sector_t sector,
+ unsigned long *mapped_blocks, struct buffer_head *bh_result,
+ int create, sector_t *bmap)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ BUG_ON(create != 0);
+
+ *bmap = 0;
+ *mapped_blocks = 0;
+
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+
+ if (sector >= last_block)
+ return 0;
+
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ bmap);
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7c31f4bc74a9..13b7202bd651 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -52,7 +52,8 @@ struct fat_mount_options {
usefree:1, /* Use free_clusters for FAT32 */
tz_set:1, /* Filesystem timestamps' offset set */
rodir:1, /* allow ATTR_RO for directory */
- discard:1; /* Issue discard requests on deletions */
+ discard:1, /* Issue discard requests on deletions */
+ dos1xfloppy:1; /* Assume default BPB for DOS 1.x floppies */
};
#define FAT_HASH_BITS 8
@@ -118,7 +119,8 @@ struct msdos_inode_info {
unsigned int cache_valid_id;
/* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
- loff_t mmu_private; /* physically allocated size */
+ loff_t mmu_private; /* physically allocated size (initialized) */
+ loff_t i_disksize; /* physically allocated size (uninitialized) */
int i_start; /* first cluster or 0 */
int i_logstart; /* logical first cluster */
@@ -289,6 +291,9 @@ extern int fat_get_cluster(struct inode *inode, int cluster,
int *fclus, int *dclus);
extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
unsigned long *mapped_blocks, int create);
+extern int fat_bmap2(struct inode *inode, sector_t sector,
+ unsigned long *mapped_blocks,
+ struct buffer_head *bh_result, int create, sector_t *bmap);
/* fat/dir.c */
extern const struct file_operations fat_dir_operations;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 85f79a89e747..92e9e753b554 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,12 @@
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+#include <linux/falloc.h>
#include "fat.h"
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len);
+
static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
@@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = {
#endif
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
+ .fallocate = fat_fallocate,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +225,75 @@ out:
return err;
}
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ int cluster;
+ int nr_cluster; /* Number of clusters to be allocated */
+ loff_t mm_bytes; /* Number of bytes to be allocated for file */
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int err = 0;
+
+ /* No support for hole punch or other fallocate flags. */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /* No support for dir */
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+ if ((offset + len) <= MSDOS_I(inode)->i_disksize)
+ goto error;
+
+ err = inode_newsize_ok(inode, (len + offset));
+ if (err)
+ goto error;
+
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ /* First compute the number of clusters to be allocated */
+ mm_bytes = offset + len - round_up(MSDOS_I(inode)->i_disksize,
+ sbi->cluster_size);
+ nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+ sbi->cluster_bits;
+
+ /* Start the allocation.We are not zeroing out the clusters */
+ while (nr_cluster-- > 0) {
+ err = fat_alloc_clusters(inode, &cluster, 1);
+ if (err) {
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_alloc_clusters() error");
+ goto error;
+ }
+ err = fat_chain_add(inode, cluster, 1);
+ if (err) {
+ fat_free_clusters(inode, cluster);
+ goto error;
+ }
+ MSDOS_I(inode)->i_disksize += sbi->cluster_size;
+ }
+ } else {
+ /* This is just an expanding truncate */
+ err = fat_cont_expand(inode, (offset + len));
+ if (err)
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_cont_expand() error");
+ }
+
+error:
+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
/* Free all clusters after the skip'th cluster. */
static int fat_free(struct inode *inode, int skip)
{
@@ -300,8 +374,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
* This protects against truncating a file bigger than it was then
* trying to write into the hole.
*/
- if (MSDOS_I(inode)->mmu_private > offset)
+ if (MSDOS_I(inode)->i_disksize > offset) {
MSDOS_I(inode)->mmu_private = offset;
+ MSDOS_I(inode)->i_disksize = offset;
+ }
nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 385cce464e82..e72afce295f3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -35,9 +35,71 @@
#define CONFIG_FAT_DEFAULT_IOCHARSET ""
#endif
+#define KB_IN_SECTORS 2
+
+/*
+ * A deserialized copy of the on-disk structure laid out in struct
+ * fat_boot_sector.
+ */
+struct fat_bios_param_block {
+ u16 fat_sector_size;
+ u8 fat_sec_per_clus;
+ u16 fat_reserved;
+ u8 fat_fats;
+ u16 fat_dir_entries;
+ u16 fat_sectors;
+ u16 fat_fat_length;
+ u32 fat_total_sect;
+
+ u8 fat16_state;
+ u32 fat16_vol_id;
+
+ u32 fat32_length;
+ u32 fat32_root_cluster;
+ u16 fat32_info_sector;
+ u8 fat32_state;
+ u32 fat32_vol_id;
+};
+
static int fat_default_codepage = CONFIG_FAT_DEFAULT_CODEPAGE;
static char fat_default_iocharset[] = CONFIG_FAT_DEFAULT_IOCHARSET;
+static struct fat_floppy_defaults {
+ unsigned nr_sectors;
+ unsigned sec_per_clus;
+ unsigned dir_entries;
+ unsigned media;
+ unsigned fat_length;
+} floppy_defaults[] = {
+{
+ .nr_sectors = 160 * KB_IN_SECTORS,
+ .sec_per_clus = 1,
+ .dir_entries = 64,
+ .media = 0xFE,
+ .fat_length = 1,
+},
+{
+ .nr_sectors = 180 * KB_IN_SECTORS,
+ .sec_per_clus = 1,
+ .dir_entries = 64,
+ .media = 0xFC,
+ .fat_length = 2,
+},
+{
+ .nr_sectors = 320 * KB_IN_SECTORS,
+ .sec_per_clus = 2,
+ .dir_entries = 112,
+ .media = 0xFF,
+ .fat_length = 1,
+},
+{
+ .nr_sectors = 360 * KB_IN_SECTORS,
+ .sec_per_clus = 2,
+ .dir_entries = 112,
+ .media = 0xFD,
+ .fat_length = 2,
+},
+};
static int fat_add_cluster(struct inode *inode)
{
@@ -54,6 +116,25 @@ static int fat_add_cluster(struct inode *inode)
return err;
}
+static void check_fallocated_region(struct inode *inode, sector_t iblock,
+ unsigned long *max_blocks, struct buffer_head *bh_result)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block, disk_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ >> blocksize_bits;
+ disk_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+ if (iblock >= last_block && iblock <= disk_block) {
+ MSDOS_I(inode)->mmu_private += *max_blocks << blocksize_bits;
+ set_buffer_new(bh_result);
+ }
+
+}
+
static inline int __fat_get_block(struct inode *inode, sector_t iblock,
unsigned long *max_blocks,
struct buffer_head *bh_result, int create)
@@ -68,8 +149,11 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
if (err)
return err;
if (phys) {
- map_bh(bh_result, sb, phys);
*max_blocks = min(mapped_blocks, *max_blocks);
+ if (create)
+ check_fallocated_region(inode, iblock, max_blocks,
+ bh_result);
+ map_bh(bh_result, sb, phys);
return 0;
}
if (!create)
@@ -93,6 +177,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+ MSDOS_I(inode)->i_disksize = MSDOS_I(inode)->mmu_private;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
@@ -207,6 +292,13 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
loff_t size = offset + count;
if (MSDOS_I(inode)->mmu_private < size)
return 0;
+
+ /*
+ * In case of writing in fallocated region, return 0 and
+ * fallback to buffered write.
+ */
+ if (MSDOS_I(inode)->i_disksize > MSDOS_I(inode)->mmu_private)
+ return 0;
}
/*
@@ -220,13 +312,36 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
return ret;
}
+static int fat_get_block_bmap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err;
+ sector_t bmap;
+ unsigned long mapped_blocks;
+
+ err = fat_bmap2(inode, iblock, &mapped_blocks, bh_result, create,
+ &bmap);
+ if (err)
+ return err;
+
+ if (bmap) {
+ map_bh(bh_result, sb, bmap);
+ max_blocks = min(mapped_blocks, max_blocks);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+ return 0;
+}
+
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
- blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
@@ -407,7 +522,6 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
- MSDOS_I(inode)->mmu_private = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
} else { /* not a directory */
@@ -422,8 +536,12 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_op = &fat_file_inode_operations;
inode->i_fop = &fat_file_operations;
inode->i_mapping->a_ops = &fat_aops;
- MSDOS_I(inode)->mmu_private = inode->i_size;
}
+
+ MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = round_up(inode->i_size,
+ inode->i_sb->s_blocksize);
+
if (de->attr & ATTR_SYS) {
if (sbi->options.sys_immutable)
inode->i_flags |= S_IMMUTABLE;
@@ -488,12 +606,34 @@ out:
EXPORT_SYMBOL_GPL(fat_build_inode);
+static int __fat_write_inode(struct inode *inode, int wait);
static void fat_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
+ } else {
+ /* Release unwritten fallocated blocks on inode eviction. */
+ if (MSDOS_I(inode)->i_disksize >
+ round_up(MSDOS_I(inode)->mmu_private,
+ inode->i_sb->s_blocksize)) {
+ int err;
+ fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+ /* Fallocate results in updating the i_start/iogstart
+ * for the zero byte file. So, make it return to
+ * original state during evict and commit it to avoid
+ * any corruption on the next access to the cluster
+ * chain for the file.
+ */
+ err = __fat_write_inode(inode, inode_needs_sync(inode));
+ if (err) {
+ fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+ "update on disk inode for unused fallocated "
+ "blocks, inode could be corrupted. Please run "
+ "fsck");
+ }
+ }
}
invalidate_inode_buffers(inode);
clear_inode(inode);
@@ -853,6 +993,8 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",nfs=stale_rw");
if (opts->discard)
seq_puts(m, ",discard");
+ if (opts->dos1xfloppy)
+ seq_puts(m, ",dos1xfloppy");
return 0;
}
@@ -867,7 +1009,7 @@ enum {
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
- Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err,
+ Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, Opt_dos1xfloppy,
};
static const match_table_t fat_tokens = {
@@ -900,6 +1042,7 @@ static const match_table_t fat_tokens = {
{Opt_nfs_stale_rw, "nfs"},
{Opt_nfs_stale_rw, "nfs=stale_rw"},
{Opt_nfs_nostale_ro, "nfs=nostale_ro"},
+ {Opt_dos1xfloppy, "dos1xfloppy"},
{Opt_obsolete, "conv=binary"},
{Opt_obsolete, "conv=text"},
{Opt_obsolete, "conv=auto"},
@@ -1102,6 +1245,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
case Opt_nfs_nostale_ro:
opts->nfs = FAT_NFS_NOSTALE_RO;
break;
+ case Opt_dos1xfloppy:
+ opts->dos1xfloppy = 1;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1225,6 +1371,7 @@ static int fat_read_root(struct inode *inode)
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
@@ -1247,6 +1394,169 @@ static unsigned long calc_fat_clusters(struct super_block *sb)
return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
}
+static bool fat_bpb_is_zero(struct fat_boot_sector *b)
+{
+ if (get_unaligned_le16(&b->sector_size))
+ return false;
+ if (b->sec_per_clus)
+ return false;
+ if (b->reserved)
+ return false;
+ if (b->fats)
+ return false;
+ if (get_unaligned_le16(&b->dir_entries))
+ return false;
+ if (get_unaligned_le16(&b->sectors))
+ return false;
+ if (b->media)
+ return false;
+ if (b->fat_length)
+ return false;
+ if (b->secs_track)
+ return false;
+ if (b->heads)
+ return false;
+ return true;
+}
+
+static int fat_read_bpb(struct super_block *sb, struct fat_boot_sector *b,
+ int silent, struct fat_bios_param_block *bpb)
+{
+ int error = -EINVAL;
+
+ /* Read in BPB ... */
+ memset(bpb, 0, sizeof(*bpb));
+ bpb->fat_sector_size = get_unaligned_le16(&b->sector_size);
+ bpb->fat_sec_per_clus = b->sec_per_clus;
+ bpb->fat_reserved = le16_to_cpu(b->reserved);
+ bpb->fat_fats = b->fats;
+ bpb->fat_dir_entries = get_unaligned_le16(&b->dir_entries);
+ bpb->fat_sectors = get_unaligned_le16(&b->sectors);
+ bpb->fat_fat_length = le16_to_cpu(b->fat_length);
+ bpb->fat_total_sect = le32_to_cpu(b->total_sect);
+
+ bpb->fat16_state = b->fat16.state;
+ bpb->fat16_vol_id = get_unaligned_le32(b->fat16.vol_id);
+
+ bpb->fat32_length = le32_to_cpu(b->fat32.length);
+ bpb->fat32_root_cluster = le32_to_cpu(b->fat32.root_cluster);
+ bpb->fat32_info_sector = le16_to_cpu(b->fat32.info_sector);
+ bpb->fat32_state = b->fat32.state;
+ bpb->fat32_vol_id = get_unaligned_le32(b->fat32.vol_id);
+
+ /* Validate this looks like a FAT filesystem BPB */
+ if (!bpb->fat_reserved) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR,
+ "bogus number of reserved sectors");
+ goto out;
+ }
+ if (!bpb->fat_fats) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
+ goto out;
+ }
+
+ /*
+ * Earlier we checked here that b->secs_track and b->head are nonzero,
+ * but it turns out valid FAT filesystems can have zero there.
+ */
+
+ if (!fat_valid_media(b->media)) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
+ (unsigned)b->media);
+ goto out;
+ }
+
+ if (!is_power_of_2(bpb->fat_sector_size)
+ || (bpb->fat_sector_size < 512)
+ || (bpb->fat_sector_size > 4096)) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
+ (unsigned)bpb->fat_sector_size);
+ goto out;
+ }
+
+ if (!is_power_of_2(bpb->fat_sec_per_clus)) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
+ (unsigned)bpb->fat_sec_per_clus);
+ goto out;
+ }
+
+ error = 0;
+
+out:
+ return error;
+}
+
+static int fat_read_static_bpb(struct super_block *sb,
+ struct fat_boot_sector *b, int silent,
+ struct fat_bios_param_block *bpb)
+{
+ static const char *notdos1x = "This doesn't look like a DOS 1.x volume";
+
+ struct fat_floppy_defaults *fdefaults = NULL;
+ int error = -EINVAL;
+ sector_t bd_sects;
+ unsigned i;
+
+ bd_sects = i_size_read(sb->s_bdev->bd_inode) / SECTOR_SIZE;
+
+ /* 16-bit DOS 1.x reliably wrote bootstrap short-jmp code */
+ if (b->ignored[0] != 0xeb || b->ignored[2] != 0x90) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR,
+ "%s; no bootstrapping code", notdos1x);
+ goto out;
+ }
+
+ /*
+ * If any value in this region is non-zero, it isn't archaic
+ * DOS.
+ */
+ if (!fat_bpb_is_zero(b)) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR,
+ "%s; DOS 2.x BPB is non-zero", notdos1x);
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(floppy_defaults); i++) {
+ if (floppy_defaults[i].nr_sectors == bd_sects) {
+ fdefaults = &floppy_defaults[i];
+ break;
+ }
+ }
+
+ if (fdefaults == NULL) {
+ if (!silent)
+ fat_msg(sb, KERN_WARNING,
+ "This looks like a DOS 1.x volume, but isn't a recognized floppy size (%llu sectors)",
+ (u64)bd_sects);
+ goto out;
+ }
+
+ if (!silent)
+ fat_msg(sb, KERN_INFO,
+ "This looks like a DOS 1.x volume; assuming default BPB values");
+
+ memset(bpb, 0, sizeof(*bpb));
+ bpb->fat_sector_size = SECTOR_SIZE;
+ bpb->fat_sec_per_clus = fdefaults->sec_per_clus;
+ bpb->fat_reserved = 1;
+ bpb->fat_fats = 2;
+ bpb->fat_dir_entries = fdefaults->dir_entries;
+ bpb->fat_sectors = fdefaults->nr_sectors;
+ bpb->fat_fat_length = fdefaults->fat_length;
+
+ error = 0;
+
+out:
+ return error;
+}
+
/*
* Read the super block of an MS-DOS FS.
*/
@@ -1256,12 +1566,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
struct inode *root_inode = NULL, *fat_inode = NULL;
struct inode *fsinfo_inode = NULL;
struct buffer_head *bh;
- struct fat_boot_sector *b;
+ struct fat_bios_param_block bpb;
struct msdos_sb_info *sbi;
u16 logical_sector_size;
u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
int debug;
- unsigned int media;
long error;
char buf[50];
@@ -1298,100 +1607,71 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
goto out_fail;
}
- b = (struct fat_boot_sector *) bh->b_data;
- if (!b->reserved) {
- if (!silent)
- fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
- brelse(bh);
- goto out_invalid;
- }
- if (!b->fats) {
- if (!silent)
- fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
- brelse(bh);
- goto out_invalid;
- }
-
- /*
- * Earlier we checked here that b->secs_track and b->head are nonzero,
- * but it turns out valid FAT filesystems can have zero there.
- */
+ error = fat_read_bpb(sb, (struct fat_boot_sector *)bh->b_data, silent,
+ &bpb);
+ if (error == -EINVAL && sbi->options.dos1xfloppy)
+ error = fat_read_static_bpb(sb,
+ (struct fat_boot_sector *)bh->b_data, silent, &bpb);
+ brelse(bh);
- media = b->media;
- if (!fat_valid_media(media)) {
- if (!silent)
- fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
- media);
- brelse(bh);
- goto out_invalid;
- }
- logical_sector_size = get_unaligned_le16(&b->sector_size);
- if (!is_power_of_2(logical_sector_size)
- || (logical_sector_size < 512)
- || (logical_sector_size > 4096)) {
- if (!silent)
- fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
- logical_sector_size);
- brelse(bh);
- goto out_invalid;
- }
- sbi->sec_per_clus = b->sec_per_clus;
- if (!is_power_of_2(sbi->sec_per_clus)) {
- if (!silent)
- fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
- sbi->sec_per_clus);
- brelse(bh);
+ if (error == -EINVAL)
goto out_invalid;
- }
+ else if (error)
+ goto out_fail;
+
+ logical_sector_size = bpb.fat_sector_size;
+ sbi->sec_per_clus = bpb.fat_sec_per_clus;
if (logical_sector_size < sb->s_blocksize) {
fat_msg(sb, KERN_ERR, "logical sector size too small for device"
" (logical sector size = %u)", logical_sector_size);
- brelse(bh);
goto out_fail;
}
+
if (logical_sector_size > sb->s_blocksize) {
- brelse(bh);
+ struct buffer_head *bh_resize;
if (!sb_set_blocksize(sb, logical_sector_size)) {
fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
logical_sector_size);
goto out_fail;
}
- bh = sb_bread(sb, 0);
- if (bh == NULL) {
+
+ /* Verify that the larger boot sector is fully readable */
+ bh_resize = sb_bread(sb, 0);
+ if (bh_resize == NULL) {
fat_msg(sb, KERN_ERR, "unable to read boot sector"
" (logical sector size = %lu)",
sb->s_blocksize);
goto out_fail;
}
- b = (struct fat_boot_sector *) bh->b_data;
+ brelse(bh_resize);
}
mutex_init(&sbi->s_lock);
sbi->cluster_size = sb->s_blocksize * sbi->sec_per_clus;
sbi->cluster_bits = ffs(sbi->cluster_size) - 1;
- sbi->fats = b->fats;
+ sbi->fats = bpb.fat_fats;
sbi->fat_bits = 0; /* Don't know yet */
- sbi->fat_start = le16_to_cpu(b->reserved);
- sbi->fat_length = le16_to_cpu(b->fat_length);
+ sbi->fat_start = bpb.fat_reserved;
+ sbi->fat_length = bpb.fat_fat_length;
sbi->root_cluster = 0;
sbi->free_clusters = -1; /* Don't know yet */
sbi->free_clus_valid = 0;
sbi->prev_free = FAT_START_ENT;
sb->s_maxbytes = 0xffffffff;
- if (!sbi->fat_length && b->fat32.length) {
+ if (!sbi->fat_length && bpb.fat32_length) {
struct fat_boot_fsinfo *fsinfo;
struct buffer_head *fsinfo_bh;
/* Must be FAT32 */
sbi->fat_bits = 32;
- sbi->fat_length = le32_to_cpu(b->fat32.length);
- sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster);
+ sbi->fat_length = bpb.fat32_length;
+ sbi->root_cluster = bpb.fat32_root_cluster;
/* MC - if info_sector is 0, don't multiply by 0 */
- sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector);
+ sbi->fsinfo_sector = bpb.fat32_info_sector;
if (sbi->fsinfo_sector == 0)
sbi->fsinfo_sector = 1;
@@ -1399,7 +1679,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
if (fsinfo_bh == NULL) {
fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
" (sector = %lu)", sbi->fsinfo_sector);
- brelse(bh);
goto out_fail;
}
@@ -1422,35 +1701,28 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
/* interpret volume ID as a little endian 32 bit integer */
if (sbi->fat_bits == 32)
- sbi->vol_id = (((u32)b->fat32.vol_id[0]) |
- ((u32)b->fat32.vol_id[1] << 8) |
- ((u32)b->fat32.vol_id[2] << 16) |
- ((u32)b->fat32.vol_id[3] << 24));
+ sbi->vol_id = bpb.fat32_vol_id;
else /* fat 16 or 12 */
- sbi->vol_id = (((u32)b->fat16.vol_id[0]) |
- ((u32)b->fat16.vol_id[1] << 8) |
- ((u32)b->fat16.vol_id[2] << 16) |
- ((u32)b->fat16.vol_id[3] << 24));
+ sbi->vol_id = bpb.fat16_vol_id;
sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry);
sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
- sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
+ sbi->dir_entries = bpb.fat_dir_entries;
if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus directory-entries per block"
" (%u)", sbi->dir_entries);
- brelse(bh);
goto out_invalid;
}
rootdir_sectors = sbi->dir_entries
* sizeof(struct msdos_dir_entry) / sb->s_blocksize;
sbi->data_start = sbi->dir_start + rootdir_sectors;
- total_sectors = get_unaligned_le16(&b->sectors);
+ total_sectors = bpb.fat_sectors;
if (total_sectors == 0)
- total_sectors = le32_to_cpu(b->total_sect);
+ total_sectors = bpb.fat_total_sect;
total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus;
@@ -1459,9 +1731,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
/* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
if (sbi->fat_bits == 32)
- sbi->dirty = b->fat32.state & FAT_STATE_DIRTY;
+ sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY;
else /* fat 16 or 12 */
- sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
+ sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY;
/* check that FAT table does not overflow */
fat_clusters = calc_fat_clusters(sb);
@@ -1470,7 +1742,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
if (!silent)
fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
total_clusters);
- brelse(bh);
goto out_invalid;
}
@@ -1483,8 +1754,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
if (sbi->prev_free < FAT_START_ENT)
sbi->prev_free = FAT_START_ENT;
- brelse(bh);
-
/* set up enough so that it can read an inode */
fat_hash_init(sb);
dir_hash_init(sb);
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index f7cff367db7f..56cce7fdd39e 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -280,15 +280,15 @@ int fscache_add_cache(struct fscache_cache *cache,
spin_unlock(&fscache_fsdef_index.lock);
up_write(&fscache_addremove_sem);
- printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n",
- cache->tag->name, cache->ops->name);
+ pr_notice("Cache \"%s\" added (type %s)\n",
+ cache->tag->name, cache->ops->name);
kobject_uevent(cache->kobj, KOBJ_ADD);
_leave(" = 0 [%s]", cache->identifier);
return 0;
tag_in_use:
- printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname);
+ pr_err("Cache tag '%s' already in use\n", tagname);
__fscache_release_cache_tag(tag);
_leave(" = -EXIST");
return -EEXIST;
@@ -317,8 +317,7 @@ EXPORT_SYMBOL(fscache_add_cache);
void fscache_io_error(struct fscache_cache *cache)
{
if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags))
- printk(KERN_ERR "FS-Cache:"
- " Cache '%s' stopped due to I/O error\n",
+ pr_err("Cache '%s' stopped due to I/O error\n",
cache->ops->name);
}
EXPORT_SYMBOL(fscache_io_error);
@@ -369,8 +368,8 @@ void fscache_withdraw_cache(struct fscache_cache *cache)
_enter("");
- printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n",
- cache->tag->name);
+ pr_notice("Withdrawing cache \"%s\"\n",
+ cache->tag->name);
/* make the cache unavailable for cookie acquisition */
if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags))
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 29d7feb62cf7..aec01be91b0a 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -519,7 +519,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
ASSERTCMP(atomic_read(&cookie->n_active), >, 0);
if (atomic_read(&cookie->n_children) != 0) {
- printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n",
+ pr_err("Cookie '%s' still has children\n",
cookie->def->name);
BUG();
}
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
index bad496748a59..7d637e2335fd 100644
--- a/fs/fscache/histogram.c
+++ b/fs/fscache/histogram.c
@@ -31,12 +31,10 @@ static int fscache_histogram_show(struct seq_file *m, void *v)
switch ((unsigned long) v) {
case 1:
- seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS "
- " RETRV DLY RETRIEVLS\n");
+ seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS\n");
return 0;
case 2:
- seq_puts(m, "===== ===== ========= ========= ========="
- " ========= =========\n");
+ seq_puts(m, "===== ===== ========= ========= ========= ========= =========\n");
return 0;
default:
index = (unsigned long) v - 3;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 4226f6680b06..bc6c08fcfddd 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -22,6 +22,12 @@
*
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "FS-Cache: " fmt
+
#include <linux/fscache-cache.h>
#include <linux/sched.h>
@@ -413,8 +419,8 @@ do { \
#define ASSERT(X) \
do { \
if (unlikely(!(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
@@ -422,9 +428,9 @@ do { \
#define ASSERTCMP(X, OP, Y) \
do { \
if (unlikely(!((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
- printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
(unsigned long)(X), (unsigned long)(Y)); \
BUG(); \
} \
@@ -433,8 +439,8 @@ do { \
#define ASSERTIF(C, X) \
do { \
if (unlikely((C) && !(X))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
BUG(); \
} \
} while (0)
@@ -442,9 +448,9 @@ do { \
#define ASSERTIFCMP(C, X, OP, Y) \
do { \
if (unlikely((C) && !((X) OP (Y)))) { \
- printk(KERN_ERR "\n"); \
- printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
- printk(KERN_ERR "%lx " #OP " %lx is false\n", \
+ pr_err("\n"); \
+ pr_err("Assertion failed\n"); \
+ pr_err("%lx " #OP " %lx is false\n", \
(unsigned long)(X), (unsigned long)(Y)); \
BUG(); \
} \
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 7c27907e650c..acd4bf1fc277 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -146,8 +146,7 @@ static int __init fscache_init(void)
0,
fscache_cookie_init_once);
if (!fscache_cookie_jar) {
- printk(KERN_NOTICE
- "FS-Cache: Failed to allocate a cookie jar\n");
+ pr_notice("Failed to allocate a cookie jar\n");
ret = -ENOMEM;
goto error_cookie_jar;
}
@@ -156,7 +155,7 @@ static int __init fscache_init(void)
if (!fscache_root)
goto error_kobj;
- printk(KERN_NOTICE "FS-Cache: Loaded\n");
+ pr_notice("Loaded\n");
return 0;
error_kobj:
@@ -192,7 +191,7 @@ static void __exit fscache_exit(void)
fscache_proc_cleanup();
destroy_workqueue(fscache_op_wq);
destroy_workqueue(fscache_object_wq);
- printk(KERN_NOTICE "FS-Cache: Unloaded\n");
+ pr_notice("Unloaded\n");
}
module_exit(fscache_exit);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 989f39401547..6d941f56faf4 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -65,8 +65,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
list_add(&netfs->link, &fscache_netfs_list);
ret = 0;
- printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n",
- netfs->name);
+ pr_notice("Netfs '%s' registered for caching\n", netfs->name);
already_registered:
up_write(&fscache_addremove_sem);
@@ -97,8 +96,8 @@ void __fscache_unregister_netfs(struct fscache_netfs *netfs)
up_write(&fscache_addremove_sem);
- printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n",
- netfs->name);
+ pr_notice("Netfs '%s' unregistered from caching\n",
+ netfs->name);
_leave("");
}
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index b5ebc2d7d80d..b8179ca6bf9d 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -285,20 +285,20 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
fscache_unuse_cookie(obj);
if (keylen > 0 || auxlen > 0) {
- seq_printf(m, " ");
+ seq_puts(m, " ");
for (p = buf; keylen > 0; keylen--)
seq_printf(m, "%02x", *p++);
if (auxlen > 0) {
if (config & FSCACHE_OBJLIST_CONFIG_KEY)
- seq_printf(m, ", ");
+ seq_puts(m, ", ");
for (; auxlen > 0; auxlen--)
seq_printf(m, "%02x", *p++);
}
}
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
} else {
- seq_printf(m, "<no_netfs>\n");
+ seq_puts(m, "<no_netfs>\n");
}
return 0;
}
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 318071aca217..e7b87a0e5185 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -51,8 +51,7 @@ void fscache_enqueue_operation(struct fscache_operation *op)
_debug("queue for caller's attention");
break;
default:
- printk(KERN_ERR "FS-Cache: Unexpected op type %lx",
- op->flags);
+ pr_err("Unexpected op type %lx", op->flags);
BUG();
break;
}
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 7f5c658af755..ed70714503fa 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -1108,10 +1108,8 @@ void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
static bool once_only;
if (!once_only) {
once_only = true;
- printk(KERN_WARNING "FS-Cache:"
- " Cookie type %s marked page %lx"
- " multiple times\n",
- cookie->def->name, page->index);
+ pr_warn("Cookie type %s marked page %lx multiple times\n",
+ cookie->def->name, page->index);
}
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index aac71ce373e4..098f97bdcf1b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1614,7 +1614,7 @@ out_finish:
static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
{
- release_pages(req->pages, req->num_pages, 0);
+ release_pages(req->pages, req->num_pages, false);
}
static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9828cf227f8a..6e16dad13e9b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1088,8 +1088,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
flush_dcache_page(page);
- mark_page_accessed(page);
-
if (!tmp) {
unlock_page(page);
page_cache_release(page);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ac3295dfa409..805b37fed638 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -577,7 +577,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
p = kmap_atomic(page);
memcpy(buf + copied, p + offset, amt);
kunmap_atomic(p);
- mark_page_accessed(page);
page_cache_release(page);
copied += amt;
index++;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 2cf09b63a6b4..b984a6e190bc 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -136,7 +136,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
yield();
}
} else {
- page = find_lock_page(mapping, index);
+ page = find_get_page_flags(mapping, index,
+ FGP_LOCK|FGP_ACCESSED);
if (!page)
return NULL;
}
@@ -153,7 +154,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
map_bh(bh, sdp->sd_vfs, blkno);
unlock_page(page);
- mark_page_accessed(page);
page_cache_release(page);
return bh;
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
index caf89a7be0a1..e5b221de7de6 100644
--- a/fs/hfsplus/attributes.c
+++ b/fs/hfsplus/attributes.c
@@ -54,14 +54,11 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key,
memset(key, 0, sizeof(struct hfsplus_attr_key));
key->attr.cnid = cpu_to_be32(cnid);
if (name) {
- len = strlen(name);
- if (len > HFSPLUS_ATTR_MAX_STRLEN) {
- pr_err("invalid xattr name's length\n");
- return -EINVAL;
- }
- hfsplus_asc2uni(sb,
+ int res = hfsplus_asc2uni(sb,
(struct hfsplus_unistr *)&key->attr.key_name,
- HFSPLUS_ATTR_MAX_STRLEN, name, len);
+ HFSPLUS_ATTR_MAX_STRLEN, name, strlen(name));
+ if (res)
+ return res;
len = be16_to_cpu(key->attr.key_name.length);
} else {
key->attr.key_name.length = 0;
@@ -82,31 +79,6 @@ int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key,
return 0;
}
-void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
- u32 cnid,
- struct hfsplus_attr_unistr *name)
-{
- int ustrlen;
-
- memset(key, 0, sizeof(struct hfsplus_attr_key));
- ustrlen = be16_to_cpu(name->length);
- key->attr.cnid = cpu_to_be32(cnid);
- key->attr.key_name.length = cpu_to_be16(ustrlen);
- ustrlen *= 2;
- memcpy(key->attr.key_name.unicode, name->unicode, ustrlen);
-
- /* The length of the key, as stored in key_len field, does not include
- * the size of the key_len field itself.
- * So, offsetof(hfsplus_attr_key, key_name) is a trick because
- * it takes into consideration key_len field (__be16) of
- * hfsplus_attr_key structure instead of length field (__be16) of
- * hfsplus_attr_unistr structure.
- */
- key->key_len =
- cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
- ustrlen);
-}
-
hfsplus_attr_entry *hfsplus_alloc_attr_entry(void)
{
return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL);
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
}
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
- u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent, struct qstr *str)
{
- int len;
+ int len, err;
key->cat.parent = cpu_to_be32(parent);
- if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
- str->name, str->len);
- len = be16_to_cpu(key->cat.name.length);
- } else {
- key->cat.name.length = 0;
- len = 0;
- }
+ err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
+ len = be16_to_cpu(key->cat.name.length);
key->key_len = cpu_to_be16(6 + 2 * len);
+ return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent)
+{
+ key->cat.parent = cpu_to_be32(parent);
+ key->cat.name.length = 0;
+ key->key_len = cpu_to_be16(6);
}
static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
hfsplus_cat_entry *entry, int type,
u32 parentid, struct qstr *str)
{
+ int err;
+
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
int err;
u16 type;
- hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
if (err)
return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto err2;
+ }
+
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
goto err2;
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto err1;
+
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
return 0;
err1:
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (!str) {
int len;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto out;
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_fd = src_fd;
/* find the old dir entry and read the data */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+ err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+ dst_dir->i_ino, dst_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
/* create new thread entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto out;
+ }
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index bdec66522de3..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -12,6 +12,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/random.h>
+#include <linux/nls.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
@@ -43,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return ERR_PTR(err);
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+ &dentry->d_name);
+ if (unlikely(err < 0))
+ goto fail;
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
if (err) {
@@ -96,9 +100,11 @@ again:
be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
- hfsplus_cat_build_key(sb, fd.search_key,
+ err = hfsplus_cat_build_key(sb, fd.search_key,
HFSPLUS_SB(sb)->hidden_dir->i_ino,
&str);
+ if (unlikely(err < 0))
+ goto fail;
goto again;
}
} else if (!dentry->d_fsdata)
@@ -127,7 +133,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
int len, err;
- char strbuf[HFSPLUS_MAX_STRLEN + 1];
+ char *strbuf;
hfsplus_cat_entry entry;
struct hfs_find_data fd;
struct hfsplus_readdir_data *rd;
@@ -139,7 +145,12 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+ strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN + 1, GFP_KERNEL);
+ if (!strbuf) {
+ err = -ENOMEM;
+ goto out;
+ }
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -193,7 +204,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
fd.entrylength);
type = be16_to_cpu(entry.type);
- len = HFSPLUS_MAX_STRLEN;
+ len = NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN;
err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len);
if (err)
goto out;
@@ -212,13 +223,31 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
be32_to_cpu(entry.folder.id), DT_DIR))
break;
} else if (type == HFSPLUS_FILE) {
+ u16 mode;
+ unsigned type = DT_UNKNOWN;
+
if (fd.entrylength < sizeof(struct hfsplus_cat_file)) {
pr_err("small file entry\n");
err = -EIO;
goto out;
}
+
+ mode = be16_to_cpu(entry.file.permissions.mode);
+ if (S_ISREG(mode))
+ type = DT_REG;
+ else if (S_ISLNK(mode))
+ type = DT_LNK;
+ else if (S_ISFIFO(mode))
+ type = DT_FIFO;
+ else if (S_ISCHR(mode))
+ type = DT_CHR;
+ else if (S_ISBLK(mode))
+ type = DT_BLK;
+ else if (S_ISSOCK(mode))
+ type = DT_SOCK;
+
if (!dir_emit(ctx, strbuf, len,
- be32_to_cpu(entry.file.id), DT_REG))
+ be32_to_cpu(entry.file.id), type))
break;
} else {
pr_err("bad catalog entry type\n");
@@ -246,6 +275,7 @@ next:
}
memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key));
out:
+ kfree(strbuf);
hfs_find_exit(&fd);
return err;
}
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 83dc29286b10..cf10da368ee6 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -375,9 +375,6 @@ int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *,
const hfsplus_btree_key *);
int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *,
u32, const char *);
-void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
- u32 cnid,
- struct hfsplus_attr_unistr *name);
int hfsplus_find_attr(struct super_block *, u32,
const char *, struct hfs_find_data *);
int hfsplus_attr_exists(struct inode *inode, const char *name);
@@ -444,8 +441,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *,
const hfsplus_btree_key *);
int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *,
const hfsplus_btree_key *);
-void hfsplus_cat_build_key(struct super_block *sb,
+int hfsplus_cat_build_key(struct super_block *sb,
hfsplus_btree_key *, u32, struct qstr *);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *, u32);
int hfsplus_find_cat(struct super_block *, u32, struct hfs_find_data *);
int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *);
int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a513d2d36be9..dcb474129d5c 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -514,7 +514,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = hfs_find_init(sbi->cat_tree, &fd);
if (err)
goto out_put_root;
- hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ if (unlikely(err < 0))
+ goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 4e27edc082a4..c03c94611cce 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -8,6 +8,7 @@
#include "hfsplus_fs.h"
#include <linux/posix_acl_xattr.h>
+#include <linux/nls.h>
#include "xattr.h"
#include "acl.h"
@@ -645,8 +646,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct hfs_find_data fd;
u16 key_len = 0;
struct hfsplus_attr_key attr_key;
- char strbuf[HFSPLUS_ATTR_MAX_STRLEN +
- XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+ char *strbuf;
int xattr_name_len;
if ((!S_ISREG(inode->i_mode) &&
@@ -666,6 +666,13 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
+ strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
+ XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
+ if (!strbuf) {
+ res = -ENOMEM;
+ goto out;
+ }
+
err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd);
if (err) {
if (err == -ENOENT) {
@@ -692,7 +699,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
if (be32_to_cpu(attr_key.cnid) != inode->i_ino)
goto end_listxattr;
- xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN;
+ xattr_name_len = NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN;
if (hfsplus_uni2asc(inode->i_sb,
(const struct hfsplus_unistr *)&fd.key->attr.key_name,
strbuf, &xattr_name_len)) {
@@ -718,6 +725,8 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
end_listxattr:
+ kfree(strbuf);
+out:
hfs_find_exit(&fd);
return res;
}
@@ -797,47 +806,55 @@ end_removexattr:
static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
- XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
/*
* Don't allow retrieving properly prefixed attributes
* by prepending them with "osx."
*/
if (is_known_namespace(name))
return -EOPNOTSUPP;
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
+ + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
+ strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
+ strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
- return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ kfree(xattr_name);
+ return res;
}
static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
- XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
/*
* Don't allow setting properly prefixed attributes
* by prepending them with "osx."
*/
if (is_known_namespace(name))
return -EOPNOTSUPP;
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
+ + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
+ strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
+ strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
- return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ kfree(xattr_name);
+ return res;
}
static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index 00722765ea79..6ec5e107691f 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -7,6 +7,8 @@
*/
#include <linux/security.h>
+#include <linux/nls.h>
+
#include "hfsplus_fs.h"
#include "xattr.h"
#include "acl.h"
@@ -14,37 +16,43 @@
static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
strcpy(xattr_name, XATTR_SECURITY_PREFIX);
strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
- return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ kfree(xattr_name);
+ return res;
}
static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
strcpy(xattr_name, XATTR_SECURITY_PREFIX);
strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
- return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ kfree(xattr_name);
+ return res;
}
static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
@@ -62,31 +70,30 @@ static int hfsplus_initxattrs(struct inode *inode,
void *fs_info)
{
const struct xattr *xattr;
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t xattr_name_len;
+ char *xattr_name;
int err = 0;
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- xattr_name_len = strlen(xattr->name);
- if (xattr_name_len == 0)
+ if (!strcmp(xattr->name, ""))
continue;
- if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN >
- HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
strcpy(xattr_name, XATTR_SECURITY_PREFIX);
strcpy(xattr_name +
XATTR_SECURITY_PREFIX_LEN, xattr->name);
memset(xattr_name +
- XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1);
+ XATTR_SECURITY_PREFIX_LEN + strlen(xattr->name), 0, 1);
err = __hfsplus_setxattr(inode, xattr_name,
xattr->value, xattr->value_len, 0);
if (err)
break;
}
+ kfree(xattr_name);
return err;
}
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
index 426cee277542..3c5f27e4746a 100644
--- a/fs/hfsplus/xattr_trusted.c
+++ b/fs/hfsplus/xattr_trusted.c
@@ -6,43 +6,51 @@
* Handler for trusted extended attributes.
*/
+#include <linux/nls.h>
+
#include "hfsplus_fs.h"
#include "xattr.h"
static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
- return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ kfree(xattr_name);
+ return res;
}
static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
- return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ kfree(xattr_name);
+ return res;
}
static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
index e34016561ae0..2b625a538b64 100644
--- a/fs/hfsplus/xattr_user.c
+++ b/fs/hfsplus/xattr_user.c
@@ -6,43 +6,51 @@
* Handler for user extended attributes.
*/
+#include <linux/nls.h>
+
#include "hfsplus_fs.h"
#include "xattr.h"
static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
strcpy(xattr_name, XATTR_USER_PREFIX);
strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
- return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
+ kfree(xattr_name);
+ return res;
}
static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
- size_t len = strlen(name);
+ char *xattr_name;
+ int res;
if (!strcmp(name, ""))
return -EINVAL;
- if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
- return -EOPNOTSUPP;
-
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
strcpy(xattr_name, XATTR_USER_PREFIX);
strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
- return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+ kfree(xattr_name);
+ return res;
}
static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e19d4c0cacae..a20021fec74b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -6,6 +6,8 @@
* Copyright (C) 2002 Linus Torvalds.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/thread_info.h>
#include <asm/current.h>
@@ -823,8 +825,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
ps = memparse(args[0].from, &rest);
pconfig->hstate = size_to_hstate(ps);
if (!pconfig->hstate) {
- printk(KERN_ERR
- "hugetlbfs: Unsupported page size %lu MB\n",
+ pr_err("Unsupported page size %lu MB\n",
ps >> 20);
return -EINVAL;
}
@@ -832,8 +833,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
}
default:
- printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
- p);
+ pr_err("Bad mount option: \"%s\"\n", p);
return -EINVAL;
break;
}
@@ -853,8 +853,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
return 0;
bad_val:
- printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
- args[0].from, p);
+ pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p);
return -EINVAL;
}
@@ -970,8 +969,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
*user = current_user();
if (user_shm_lock(size, *user)) {
task_lock(current);
- printk_once(KERN_WARNING
- "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
+ pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
current->comm, current->pid);
task_unlock(current);
} else {
@@ -1031,7 +1029,7 @@ static int __init init_hugetlbfs_fs(void)
int i;
if (!hugepages_supported()) {
- pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n");
+ pr_info("disabling because there are no supported hugepage sizes\n");
return -ENOTSUPP;
}
@@ -1060,7 +1058,7 @@ static int __init init_hugetlbfs_fs(void)
buf);
if (IS_ERR(hugetlbfs_vfsmount[i])) {
- pr_err("hugetlb: Cannot mount internal hugetlbfs for "
+ pr_err("Cannot mount internal hugetlbfs for "
"page size %uK", ps_kb);
error = PTR_ERR(hugetlbfs_vfsmount[i]);
hugetlbfs_vfsmount[i] = NULL;
diff --git a/fs/isofs/Makefile b/fs/isofs/Makefile
index bf162f0942d5..47a68e357512 100644
--- a/fs/isofs/Makefile
+++ b/fs/isofs/Makefile
@@ -8,3 +8,5 @@ isofs-objs-y := namei.o inode.o dir.o util.o rock.o export.o
isofs-objs-$(CONFIG_JOLIET) += joliet.o
isofs-objs-$(CONFIG_ZISOFS) += compress.o
isofs-objs := $(isofs-objs-y)
+
+# ccflags-y := -DDEBUG_FLAGS=1
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..8a331c9b1195 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -15,6 +15,8 @@
*
* Transparent decompression of files on an iso9660 filesystem
*/
+#define DEBUG
+#define pr_fmt(fmt) "zisofs: " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -110,7 +112,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
*errp = -ENOMEM;
else
*errp = -EIO;
- printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
+ pr_debug("zisofs_inflateInit returned %d\n",
zerr);
goto z_eio;
}
@@ -154,15 +156,10 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
if (zerr == Z_MEM_ERROR)
*errp = -ENOMEM;
else {
- printk(KERN_DEBUG
- "zisofs: zisofs_inflate returned"
- " %d, inode = %lu,"
- " page idx = %d, bh idx = %d,"
- " avail_in = %d,"
- " avail_out = %d\n",
- zerr, inode->i_ino, curpage,
- curbh, stream.avail_in,
- stream.avail_out);
+ pr_debug("zisofs_inflate returned %d, inode = %lu, page idx = %d, bh idx = %d, avail_in = %d, avail_out = %d\n",
+ zerr, inode->i_ino, curpage,
+ curbh, stream.avail_in,
+ stream.avail_out);
*errp = -EIO;
}
goto inflate_out;
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 12088d8de3fa..44d1053dfad5 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -12,7 +12,7 @@
* Documentation/filesystems/nfs/Exporting
* fs/exportfs/expfs.c.
*/
-
+#define pr_fmt(fmt) "ISOFS: " fmt
#include "isofs.h"
static struct dentry *
@@ -52,8 +52,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* "child" must always be a directory. */
if (!S_ISDIR(child_inode->i_mode)) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child is not a directory!\n");
+ pr_err("%s(): child is not a directory!\n", __func__);
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -62,8 +61,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
* it is not zero, it means the directory failed to be
* normalized for some reason. */
if (e_child_inode->i_iget5_offset != 0) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child directory not normalized!\n");
+ pr_err("isofs_export_get_parent(): child directory not normalized!\n");
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -89,8 +87,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* Verify it is in fact the ".." entry. */
if ((isonum_711(de->name_len) != 1) || (de->name[0] != 1)) {
- printk(KERN_ERR "isofs: Unable to find the \"..\" "
- "directory for NFS.\n");
+ pr_err("Unable to find the \"..\" directory for NFS.\n");
rv = ERR_PTR(-EACCES);
goto out;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b0..cc23d86e174a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -10,6 +10,8 @@
* 2004 Paul Serice - Inode Support pushed out from 4GB to 128GB
* 2004 Paul Serice - NFS Export Operations
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: " fmt
#include <linux/init.h>
#include <linux/module.h>
@@ -528,23 +530,25 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
Te.cdte_format=CDROM_LBA;
i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
if (!i) {
- printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
+ pr_debug("Session %d start %d type %d\n",
session, Te.cdte_addr.lba,
Te.cdte_ctrl&CDROM_DATA_TRACK);
if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
return Te.cdte_addr.lba;
}
- printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
+ pr_err("Invalid session number or type of track\n");
}
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
if (session > 0)
- printk(KERN_ERR "ISOFS: Invalid session number\n");
+ pr_err("Invalid session number\n");
#if 0
- printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
+ pr_debug("isofs.inode: CDROMMULTISESSION: rc=%d\n", i);
if (i==0) {
- printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
- printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
+ pr_debug("isofs.inode: XA disk: %s\n",
+ ms_info.xa_flag?"yes":"no");
+ pr_debug("isofs.inode: vol_desc_start = %d\n",
+ ms_info.addr.lba);
}
#endif
if (i==0)
@@ -672,8 +676,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
else if (sec->escape[2] == 0x45)
joliet_level = 3;
- printk(KERN_DEBUG "ISO 9660 Extensions: "
- "Microsoft Joliet Level %d\n",
+ pr_debug("ISO 9660 Extensions: Microsoft Joliet Level %d\n",
joliet_level);
}
goto root_found;
@@ -771,11 +774,11 @@ root_found:
isonum_711(rootp->ext_attr_length);
sbi->s_firstdatazone = first_data_zone;
#ifndef BEQUIET
- printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n",
+ pr_debug("Max size:%ld Log zone size:%ld\n",
sbi->s_max_size, 1UL << sbi->s_log_zone_size);
- printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone);
+ pr_debug("First datazone:%ld\n", sbi->s_firstdatazone);
if(sbi->s_high_sierra)
- printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n");
+ pr_debug("Disc in High Sierra format.\n");
#endif
/*
@@ -878,9 +881,7 @@ root_found:
*/
if (sbi->s_rock == 1 && joliet_level &&
rootdir_empty(s, sbi->s_firstdatazone)) {
- printk(KERN_NOTICE
- "ISOFS: primary root directory is empty. "
- "Disabling Rock Ridge and switching to Joliet.");
+ pr_notice("primary root directory is empty. Disabling Rock Ridge and switching to Joliet.");
sbi->s_rock = 0;
}
@@ -898,8 +899,7 @@ root_found:
sbi->s_rock = 0;
if (sbi->s_firstdatazone != first_data_zone) {
sbi->s_firstdatazone = first_data_zone;
- printk(KERN_DEBUG
- "ISOFS: changing to secondary root\n");
+ pr_debug("changing to secondary root\n");
iput(inode);
inode = isofs_iget(s, sbi->s_firstdatazone, 0);
if (IS_ERR(inode))
@@ -918,9 +918,8 @@ root_found:
/* Make sure the root inode is a directory */
if (!S_ISDIR(inode->i_mode)) {
- printk(KERN_WARNING
- "isofs_fill_super: root inode is not a directory. "
- "Corrupted media?\n");
+ pr_warn("%s: root inode is not a directory. Corrupted media?\n",
+ __func__);
goto out_iput;
}
@@ -952,27 +951,26 @@ out_iput:
out_no_root:
error = PTR_ERR(inode);
if (error != -ENOMEM)
- printk(KERN_WARNING "%s: get root inode failed\n", __func__);
+ pr_warn("%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
- printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
+ pr_warn("%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
__func__, s->s_id, iso_blknum, block);
goto out_freebh;
out_bad_zone_size:
- printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
- sbi->s_log_zone_size);
+ pr_warn("Bad logical zone size %ld\n", sbi->s_log_zone_size);
goto out_freebh;
out_bad_size:
- printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n",
+ pr_warn("Logical zone size(%d) < hardware blocksize(%u)\n",
orig_zonesize, opt.blocksize);
goto out_freebh;
out_unknown_format:
if (!silent)
- printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n");
+ pr_warn("Unable to identify CD-ROM format.\n");
out_freebh:
brelse(bh);
@@ -1021,7 +1019,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
error = -EIO;
rv = 0;
if (iblock != b_off) {
- printk(KERN_DEBUG "%s: block number too large\n", __func__);
+ pr_debug("%s: block number too large\n", __func__);
goto abort;
}
@@ -1042,7 +1040,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
* I/O errors.
*/
if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
- printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+ pr_debug("%s: block >= EOF (%lu, %llu)\n",
__func__, b_off,
(unsigned long long)inode->i_size);
goto abort;
@@ -1068,12 +1066,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
iput(ninode);
if (++section > 100) {
- printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
- " aborting...\n", __func__);
- printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
- "nextblk=%lu nextoff=%lu\n", __func__,
- b_off, firstext, (unsigned) sect_size,
- nextblk, nextoff);
+ pr_debug("%s: More than 100 file sections ?!? aborting...\n",
+ __func__);
+ pr_debug("%s: block=%lu firstext=%u sect_size=%u nextblk=%lu nextoff=%lu\n",
+ __func__, b_off, firstext,
+ (unsigned) sect_size, nextblk, nextoff);
goto abort;
}
}
@@ -1105,7 +1102,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock,
int ret;
if (create) {
- printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__);
+ pr_debug("%s: Kernel tries to allocate a block\n", __func__);
return -EROFS;
}
@@ -1248,13 +1245,12 @@ out_nomem:
return -ENOMEM;
out_noread:
- printk(KERN_INFO "ISOFS: unable to read i-node block %lu\n", block);
+ pr_info("unable to read i-node block %lu\n", block);
kfree(tmpde);
return -EIO;
out_toomany:
- printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
- "isofs_read_level3_size: inode=%lu\n",
+ pr_info("%s: More than 100 file sections ?!?, aborting...\n isofs_read_level3_size: inode=%lu\n",
__func__, inode->i_ino);
goto out;
}
@@ -1289,7 +1285,7 @@ static int isofs_read_inode(struct inode *inode)
tmpde = kmalloc(de_len, GFP_KERNEL);
if (tmpde == NULL) {
- printk(KERN_INFO "%s: out of memory\n", __func__);
+ pr_info("%s: out of memory\n", __func__);
ret = -ENOMEM;
goto fail;
}
@@ -1364,24 +1360,23 @@ static int isofs_read_inode(struct inode *inode)
inode->i_size &= 0x00ffffff;
if (de->interleave[0]) {
- printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n");
+ pr_debug("Interleaved files not (yet) supported.\n");
inode->i_size = 0;
}
/* I have no idea what file_unit_size is used for, so
we will flag it for now */
if (de->file_unit_size[0] != 0) {
- printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
- inode->i_ino);
+ pr_debug("File unit size != 0 for ISO file (%ld).\n",
+ inode->i_ino);
}
/* I have no idea what other flag bits are used for, so
we will flag it for now */
-#ifdef DEBUG
+#ifdef DEBUG_FLAGS
if((de->flags[-high_sierra] & ~2)!= 0){
- printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file "
- "(%ld %x).\n",
- inode->i_ino, de->flags[-high_sierra]);
+ pr_debug("Unusual flag settings for ISO file (%ld %x).\n",
+ inode->i_ino, de->flags[-high_sierra]);
}
#endif
@@ -1450,7 +1445,7 @@ out:
return ret;
out_badread:
- printk(KERN_WARNING "ISOFS: unable to read i-node block\n");
+ pr_warn("unable to read i-node block\n");
fail:
goto out;
}
@@ -1541,6 +1536,7 @@ MODULE_ALIAS("iso9660");
static int __init init_iso9660_fs(void)
{
int err = init_inodecache();
+
if (err)
goto out;
#ifdef CONFIG_ZISOFS
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 95295640d9c8..c5ed09733112 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -113,9 +113,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
dpnt = de->name;
/* Basic sanity check, whether name doesn't exceed dir entry */
if (de_len < dlen + sizeof(struct iso_directory_record)) {
- printk(KERN_NOTICE "iso9660: Corrupted directory entry"
- " in block %lu of inode %lu\n", block,
- dir->i_ino);
+ pr_notice("Corrupted directory entry in block %lu of inode %lu\n",
+ block, dir->i_ino);
return 0;
}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..b13119556e5d 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -5,6 +5,8 @@
*
* Rock Ridge Extensions to iso9660
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: rock: " fmt
#include <linux/slab.h>
#include <linux/pagemap.h>
@@ -89,9 +91,8 @@ static int rock_continue(struct rock_state *rs)
if ((unsigned)rs->cont_offset > blocksize - min_de_size ||
(unsigned)rs->cont_size > blocksize ||
(unsigned)(rs->cont_offset + rs->cont_size) > blocksize) {
- printk(KERN_NOTICE "rock: corrupted directory entry. "
- "extent=%d, offset=%d, size=%d\n",
- rs->cont_extent, rs->cont_offset, rs->cont_size);
+ pr_notice("corrupted directory entry. extent=%d, offset=%d, size=%d\n",
+ rs->cont_extent, rs->cont_offset, rs->cont_size);
ret = -EIO;
goto out;
}
@@ -117,7 +118,7 @@ static int rock_continue(struct rock_state *rs)
rs->cont_offset = 0;
return 0;
}
- printk("Unable to read rock-ridge attributes\n");
+ pr_warn("Unable to read rock-ridge attributes\n");
}
out:
kfree(rs->buffer);
@@ -176,10 +177,9 @@ static int rock_check_overflow(struct rock_state *rs, int sig)
}
len += offsetof(struct rock_ridge, u);
if (len > rs->len) {
- printk(KERN_NOTICE "rock: directory entry would overflow "
- "storage\n");
- printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n",
- sig, len, rs->len);
+ pr_notice("directory entry would overflow storage\n");
+ pr_notice("sig=0x%02x, size=%d, remaining=%d\n",
+ sig, len, rs->len);
return -EIO;
}
return 0;
@@ -257,7 +257,7 @@ repeat:
break;
if (rr->u.NM.flags & ~1) {
- printk("Unsupported NM flag settings (%d)\n",
+ pr_warn("Unsupported NM flag settings (%d)\n",
rr->u.NM.flags);
break;
}
@@ -353,13 +353,13 @@ repeat:
break;
case SIG('E', 'R'):
ISOFS_SB(inode->i_sb)->s_rock = 1;
- printk(KERN_DEBUG "ISO 9660 Extensions: ");
+ pr_debug("ISO 9660 Extensions: ");
{
int p;
for (p = 0; p < rr->u.ER.len_id; p++)
- printk("%c", rr->u.ER.data[p]);
+ pr_warn("%c", rr->u.ER.data[p]);
}
- printk("\n");
+ pr_warn("\n");
break;
case SIG('P', 'X'):
inode->i_mode = isonum_733(rr->u.PX.mode);
@@ -450,8 +450,7 @@ repeat:
inode->i_size += 1;
break;
default:
- printk("Symlink component flag "
- "not implemented\n");
+ pr_warn("Symlink component flag not implemented\n");
}
slen -= slp->len + 2;
oldslp = slp;
@@ -481,8 +480,7 @@ repeat:
symlink_len = inode->i_size;
break;
case SIG('R', 'E'):
- printk(KERN_WARNING "Attempt to read inode for "
- "relocated directory\n");
+ pr_warn("Attempt to read inode for relocated directory\n");
goto out;
case SIG('C', 'L'):
ISOFS_I(inode)->i_first_extent =
@@ -518,9 +516,7 @@ repeat:
int block_shift =
isonum_711(&rr->u.ZF.parms[1]);
if (block_shift > 17) {
- printk(KERN_WARNING "isofs: "
- "Can't handle ZF block "
- "size of 2^%d\n",
+ pr_warn("Can't handle ZF block size of 2^%d\n",
block_shift);
} else {
/*
@@ -543,9 +539,7 @@ repeat:
real_size);
}
} else {
- printk(KERN_WARNING
- "isofs: Unknown ZF compression "
- "algorithm: %c%c\n",
+ pr_warn("Unknown ZF compression algorithm: %c%c\n",
rr->u.ZF.algorithm[0],
rr->u.ZF.algorithm[1]);
}
@@ -604,7 +598,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
*rpnt++ = '/';
break;
default:
- printk("Symlink component flag not implemented (%d)\n",
+ pr_warn("Symlink component flag not implemented (%d)\n",
slp->flags);
}
slen -= slp->len + 2;
@@ -757,10 +751,10 @@ out:
kfree(rs.buffer);
goto fail;
out_noread:
- printk("unable to read i-node block");
+ pr_warn("unable to read i-node block");
goto fail;
out_bad_span:
- printk("symlink spans iso9660 blocks\n");
+ pr_warn("symlink spans iso9660 blocks\n");
fail:
brelse(bh);
error:
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 2b60ce1996aa..bb9cebc9ca8a 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -75,10 +75,13 @@ void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c)
static int jffs2_garbage_collect_thread(void *_c)
{
struct jffs2_sb_info *c = _c;
+ sigset_t hupmask;
+ siginitset(&hupmask, sigmask(SIGHUP));
allow_signal(SIGKILL);
allow_signal(SIGSTOP);
allow_signal(SIGCONT);
+ allow_signal(SIGHUP);
c->gc_task = current;
complete(&c->gc_thread_start);
@@ -87,7 +90,7 @@ static int jffs2_garbage_collect_thread(void *_c)
set_freezable();
for (;;) {
- allow_signal(SIGHUP);
+ sigprocmask(SIG_UNBLOCK, &hupmask, NULL);
again:
spin_lock(&c->erase_completion_lock);
if (!jffs2_thread_should_wake(c)) {
@@ -95,10 +98,9 @@ static int jffs2_garbage_collect_thread(void *_c)
spin_unlock(&c->erase_completion_lock);
jffs2_dbg(1, "%s(): sleeping...\n", __func__);
schedule();
- } else
+ } else {
spin_unlock(&c->erase_completion_lock);
-
-
+ }
/* Problem - immediately after bootup, the GCD spends a lot
* of time in places like jffs2_kill_fragtree(); so much so
* that userspace processes (like gdm and X) are starved
@@ -150,7 +152,7 @@ static int jffs2_garbage_collect_thread(void *_c)
}
}
/* We don't want SIGHUP to interrupt us. STOP and KILL are OK though. */
- disallow_signal(SIGHUP);
+ sigprocmask(SIG_BLOCK, &hupmask, NULL);
jffs2_dbg(1, "%s(): pass\n", __func__);
if (jffs2_garbage_collect_pass(c) == -ENOSPC) {
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 8d811e02b4b9..0acddf60af55 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -167,7 +167,7 @@ do { \
* Global list of active external journals
*/
static LIST_HEAD(jfs_external_logs);
-static struct jfs_log *dummy_log = NULL;
+static struct jfs_log *dummy_log;
static DEFINE_MUTEX(jfs_log_mutex);
/*
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 97f7fda51890..d1096fed5a62 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -50,14 +50,14 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
MODULE_LICENSE("GPL");
-static struct kmem_cache * jfs_inode_cachep;
+static struct kmem_cache *jfs_inode_cachep;
static const struct super_operations jfs_super_operations;
static const struct export_operations jfs_export_operations;
static struct file_system_type jfs_fs_type;
#define MAX_COMMIT_THREADS 64
-static int commit_threads = 0;
+static int commit_threads;
module_param(commit_threads, int, 0);
MODULE_PARM_DESC(commit_threads, "Number of commit threads");
@@ -84,8 +84,7 @@ static void jfs_handle_error(struct super_block *sb)
panic("JFS (device %s): panic forced after error\n",
sb->s_id);
else if (sbi->flag & JFS_ERR_REMOUNT_RO) {
- jfs_err("ERROR: (device %s): remounting filesystem "
- "as read-only\n",
+ jfs_err("ERROR: (device %s): remounting filesystem as read-only\n",
sb->s_id);
sb->s_flags |= MS_RDONLY;
}
@@ -363,12 +362,10 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
* -> user has more control over the online trimming
*/
sbi->minblks_trim = 64;
- if (blk_queue_discard(q)) {
+ if (blk_queue_discard(q))
*flag |= JFS_DISCARD;
- } else {
- pr_err("JFS: discard option " \
- "not supported on device\n");
- }
+ else
+ pr_err("JFS: discard option not supported on device\n");
break;
}
@@ -385,15 +382,14 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
sbi->minblks_trim = simple_strtoull(
minblks_trim, &minblks_trim, 0);
} else {
- pr_err("JFS: discard option " \
- "not supported on device\n");
+ pr_err("JFS: discard option not supported on device\n");
}
break;
}
default:
- printk("jfs: Unrecognized mount option \"%s\" "
- " or missing value\n", p);
+ printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
+ p);
goto cleanup;
}
}
@@ -419,14 +415,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
int ret;
sync_filesystem(sb);
- if (!parse_options(data, sb, &newLVSize, &flag)) {
+ if (!parse_options(data, sb, &newLVSize, &flag))
return -EINVAL;
- }
if (newLVSize) {
if (sb->s_flags & MS_RDONLY) {
- pr_err("JFS: resize requires volume" \
- " to be mounted read-write\n");
+ pr_err("JFS: resize requires volume to be mounted read-write\n");
return -EROFS;
}
rc = jfs_extendfs(sb, newLVSize, 0);
@@ -452,9 +446,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
}
if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
rc = dquot_suspend(sb, -1);
- if (rc < 0) {
+ if (rc < 0)
return rc;
- }
rc = jfs_umount_rw(sb);
JFS_SBI(sb)->flag = flag;
return rc;
@@ -487,7 +480,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
if (!new_valid_dev(sb->s_bdev->bd_dev))
return -EOVERFLOW;
- sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
@@ -548,9 +541,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
rc = jfs_mount(sb);
if (rc) {
- if (!silent) {
+ if (!silent)
jfs_err("jfs_mount failed w/return code = %d", rc);
- }
goto out_mount_failed;
}
if (sb->s_flags & MS_RDONLY)
@@ -587,7 +579,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
* Page cache is indexed by long.
* I would use MAX_LFS_FILESIZE, but it's only half as big
*/
- sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes);
+ sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1,
+ (u64)sb->s_maxbytes);
#endif
sb->s_time_gran = 1;
return 0;
@@ -597,9 +590,8 @@ out_no_root:
out_no_rw:
rc = jfs_umount(sb);
- if (rc) {
+ if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
- }
out_mount_failed:
filemap_write_and_wait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
@@ -924,7 +916,8 @@ static int __init init_jfs_fs(void)
commit_threads = MAX_COMMIT_THREADS;
for (i = 0; i < commit_threads; i++) {
- jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit");
+ jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL,
+ "jfsCommit");
if (IS_ERR(jfsCommitThread[i])) {
rc = PTR_ERR(jfsCommitThread[i]);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
diff --git a/fs/mpage.c b/fs/mpage.c
index 4979ffa60aaa..003f6fe3cdb6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -48,23 +48,7 @@ static void mpage_end_io(struct bio *bio, int err)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
-
- if (bio_data_dir(bio) == READ) {
- if (!err) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
- } else { /* bio_data_dir(bio) == WRITE */
- if (err) {
- SetPageError(page);
- if (page->mapping)
- set_bit(AS_EIO, &page->mapping->flags);
- }
- end_page_writeback(page);
- }
+ page_endio(page, bio_data_dir(bio), err);
}
bio_put(bio);
@@ -285,6 +269,11 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
alloc_new:
if (bio == NULL) {
+ if (first_hole == blocks_per_page) {
+ if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
+ page))
+ goto out;
+ }
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
GFP_KERNEL);
@@ -439,6 +428,35 @@ struct mpage_data {
unsigned use_writepage;
};
+/*
+ * We have our BIO, so we can now mark the buffers clean. Make
+ * sure to only clean buffers which we know we'll be writing.
+ */
+static void clean_buffers(struct page *page, unsigned first_unmapped)
+{
+ unsigned buffer_counter = 0;
+ struct buffer_head *bh, *head;
+ if (!page_has_buffers(page))
+ return;
+ head = page_buffers(page);
+ bh = head;
+
+ do {
+ if (buffer_counter++ == first_unmapped)
+ break;
+ clear_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /*
+ * we cannot drop the bh if the page is not uptodate or a concurrent
+ * readpage would fail to serialize with the bh and it would read from
+ * disk before we reach the platter.
+ */
+ if (buffer_heads_over_limit && PageUptodate(page))
+ try_to_free_buffers(page);
+}
+
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -462,6 +480,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -570,10 +589,17 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
alloc_new:
if (bio == NULL) {
+ if (first_unmapped == blocks_per_page) {
+ if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
+ page, wbc)) {
+ clean_buffers(page, first_unmapped);
+ goto out;
+ }
+ }
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
if (bio == NULL)
@@ -587,40 +613,17 @@ alloc_new:
*/
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
goto alloc_new;
}
- /*
- * OK, we have our BIO, so we can now mark the buffers clean. Make
- * sure to only clean buffers which we know we'll be writing.
- */
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
- struct buffer_head *bh = head;
- unsigned buffer_counter = 0;
-
- do {
- if (buffer_counter++ == first_unmapped)
- break;
- clear_buffer_dirty(bh);
- bh = bh->b_this_page;
- } while (bh != head);
-
- /*
- * we cannot drop the bh if the page is not uptodate
- * or a concurrent readpage would fail to serialize with the bh
- * and it would read from disk before we reach the platter.
- */
- if (buffer_heads_over_limit && PageUptodate(page))
- try_to_free_buffers(page);
- }
+ clean_buffers(page, first_unmapped);
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -632,7 +635,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -688,8 +691,11 @@ mpage_writepages(struct address_space *mapping,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
}
blk_finish_plug(&plug);
return ret;
@@ -706,8 +712,11 @@ int mpage_writepage(struct page *page, get_block_t get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(mpage_writepage);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 732648b270dc..9163a6ed67d2 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -776,7 +776,10 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
case FAN_MARK_REMOVE:
if (!mask)
return -EINVAL;
+ break;
case FAN_MARK_FLUSH:
+ if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH))
+ return -EINVAL;
break;
default:
return -EINVAL;
@@ -813,6 +816,15 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
group->priority == FS_PRIO_0)
goto fput_and_out;
+ if (flags & FAN_MARK_FLUSH) {
+ ret = 0;
+ if (flags & FAN_MARK_MOUNT)
+ fsnotify_clear_vfsmount_marks_by_group(group);
+ else
+ fsnotify_clear_inode_marks_by_group(group);
+ goto fput_and_out;
+ }
+
ret = fanotify_find_path(dfd, pathname, &path, flags);
if (ret)
goto fput_and_out;
@@ -824,7 +836,7 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
mnt = path.mnt;
/* create/update an inode mark */
- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
+ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
case FAN_MARK_ADD:
if (flags & FAN_MARK_MOUNT)
ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
@@ -837,12 +849,6 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
else
ret = fanotify_remove_inode_mark(group, inode, mask, flags);
break;
- case FAN_MARK_FLUSH:
- if (flags & FAN_MARK_MOUNT)
- fsnotify_clear_vfsmount_marks_by_group(group);
- else
- fsnotify_clear_inode_marks_by_group(group);
- break;
default:
ret = -EINVAL;
}
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 923fe4a5f503..d90deaa08e78 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -340,7 +340,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
static int fsnotify_mark_destroy(void *ignored)
{
struct fsnotify_mark *mark, *next;
- LIST_HEAD(private_destroy_list);
+ struct list_head private_destroy_list;
for (;;) {
spin_lock(&destroy_lock);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index a27e3fecefaf..250ed5b20c8f 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1748,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
if (page) {
set_page_dirty(page);
unlock_page(page);
- mark_page_accessed(page);
page_cache_release(page);
}
ntfs_debug("Done.");
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index ee4144ce5d7c..f82498c35e78 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -58,7 +58,7 @@ typedef enum {
/**
* ntfs_compression_buffer - one buffer for the decompression engine
*/
-static u8 *ntfs_compression_buffer = NULL;
+static u8 *ntfs_compression_buffer;
/**
* ntfs_cb_lock - spinlock which protects ntfs_compression_buffer
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 89b4d6663775..5c9e2c81cb11 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2060,7 +2060,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
}
do {
unlock_page(pages[--do_pages]);
- mark_page_accessed(pages[do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
if (unlikely(status))
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 9de2491f2926..6c3296e546c3 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -50,8 +50,8 @@
static unsigned long ntfs_nr_compression_users;
/* A global default upcase table and a corresponding reference count. */
-static ntfschar *default_upcase = NULL;
-static unsigned long ntfs_nr_upcase_users = 0;
+static ntfschar *default_upcase;
+static unsigned long ntfs_nr_upcase_users;
/* Error constants/strings used in inode.c::ntfs_show_options(). */
typedef enum {
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 79a89184cb5e..1927170a35ce 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -56,7 +56,7 @@ static ctl_table sysctls_root[] = {
};
/* Storage for the sysctls header. */
-static struct ctl_table_header *sysctls_root_table = NULL;
+static struct ctl_table_header *sysctls_root_table;
/**
* ntfs_sysctl - add or remove the debug sysctl
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index c6b90e670389..681691bc233a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -108,7 +108,7 @@ static struct rb_root o2net_handler_tree = RB_ROOT;
static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
/* XXX someday we'll need better accounting */
-static struct socket *o2net_listen_sock = NULL;
+static struct socket *o2net_listen_sock;
/*
* listen work is only queued by the listening socket callbacks on the
@@ -1799,7 +1799,7 @@ int o2net_register_hb_callbacks(void)
/* ------------------------------------------------------------ */
-static int o2net_accept_one(struct socket *sock)
+static int o2net_accept_one(struct socket *sock, int *more)
{
int ret, slen;
struct sockaddr_in sin;
@@ -1810,6 +1810,7 @@ static int o2net_accept_one(struct socket *sock)
struct o2net_node *nn;
BUG_ON(sock == NULL);
+ *more = 0;
ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
sock->sk->sk_protocol, &new_sock);
if (ret)
@@ -1821,6 +1822,7 @@ static int o2net_accept_one(struct socket *sock)
if (ret < 0)
goto out;
+ *more = 1;
new_sock->sk->sk_allocation = GFP_ATOMIC;
ret = o2net_set_nodelay(new_sock);
@@ -1919,11 +1921,36 @@ out:
return ret;
}
+/*
+ * This function is invoked in response to one or more
+ * pending accepts at softIRQ level. We must drain the
+ * entire que before returning.
+ */
+
static void o2net_accept_many(struct work_struct *work)
{
struct socket *sock = o2net_listen_sock;
- while (o2net_accept_one(sock) == 0)
+ int more;
+ int err;
+
+ /*
+ * It is critical to note that due to interrupt moderation
+ * at the network driver level, we can't assume to get a
+ * softIRQ for every single conn since tcp SYN packets
+ * can arrive back-to-back, and therefore many pending
+ * accepts may result in just 1 softIRQ. If we terminate
+ * the o2net_accept_one() loop upon seeing an err, what happens
+ * to the rest of the conns in the queue? If no new SYN
+ * arrives for hours, no softIRQ will be delivered,
+ * and the connections will just sit in the queue.
+ */
+
+ for (;;) {
+ err = o2net_accept_one(sock, &more);
+ if (!more)
+ break;
cond_resched();
+ }
}
static void o2net_listen_data_ready(struct sock *sk)
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index e33cd7a3c582..18f13c2e4a10 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -338,7 +338,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
#ifdef CONFIG_DEBUG_FS
-static struct dentry *dlm_debugfs_root = NULL;
+static struct dentry *dlm_debugfs_root;
#define DLM_DEBUGFS_DIR "o2dlm"
#define DLM_DEBUGFS_DLM_STATE "dlm_state"
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5d32f7511f74..66c2a491f68d 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -52,7 +52,7 @@
#define MLOG_MASK_PREFIX ML_DLM
#include "cluster/masklog.h"
-static struct kmem_cache *dlm_lock_cache = NULL;
+static struct kmem_cache *dlm_lock_cache;
static DEFINE_SPINLOCK(dlm_cookie_lock);
static u64 dlm_next_cookie = 1;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index af3f7aa73e13..1256dc49f83f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -82,9 +82,9 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
return 1;
}
-static struct kmem_cache *dlm_lockres_cache = NULL;
-static struct kmem_cache *dlm_lockname_cache = NULL;
-static struct kmem_cache *dlm_mle_cache = NULL;
+static struct kmem_cache *dlm_lockres_cache;
+static struct kmem_cache *dlm_lockname_cache;
+static struct kmem_cache *dlm_mle_cache;
static void dlm_mle_release(struct kref *kref);
static void dlm_init_mle(struct dlm_master_list_entry *mle,
@@ -3084,11 +3084,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
/* remove it so that only one mle will be found */
__dlm_unlink_mle(dlm, tmp);
__dlm_mle_detach_hb_events(dlm, tmp);
- ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
- mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
- "telling master to get ref for cleared out mle "
- "during migration\n", dlm->name, namelen, name,
- master, new_master);
+ if (tmp->type == DLM_MLE_MASTER) {
+ ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
+ mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
+ "telling master to get ref "
+ "for cleared out mle during "
+ "migration\n", dlm->name,
+ namelen, name, master,
+ new_master);
+ }
}
spin_unlock(&tmp->spinlock);
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 465c95016a39..2930e231f3f9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -828,7 +828,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/*
* fs-writeback will release the dirty pages without page lock
* whose offset are over inode size, the release happens at
- * block_write_full_page_endio().
+ * block_write_full_page().
*/
i_size_write(inode, abs_to);
inode->i_blocks = ocfs2_inode_sector_count(inode);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 03ea9314fecd..4b0c68849b36 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -30,6 +30,7 @@
#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/random.h>
+#include <linux/delay.h>
#include <cluster/masklog.h>
@@ -2185,8 +2186,20 @@ static int ocfs2_commit_thread(void *arg)
|| kthread_should_stop());
status = ocfs2_commit_cache(osb);
- if (status < 0)
- mlog_errno(status);
+ if (status < 0) {
+ static unsigned long abort_warn_time;
+
+ /* Warn about this once per minute */
+ if (printk_timed_ratelimit(&abort_warn_time, 60*HZ))
+ mlog(ML_ERROR, "status = %d, journal is "
+ "already aborted.\n", status);
+ /*
+ * After ocfs2_commit_cache() fails, j_num_trans has a
+ * non-zero value. Sleep here to avoid a busy-wait
+ * loop.
+ */
+ msleep_interruptible(1000);
+ }
if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){
mlog(ML_KTHREAD,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2060fc398445..ad8022431e09 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -231,6 +231,7 @@ static int ocfs2_mknod(struct inode *dir,
sigset_t oldset;
int did_block_signals = 0;
struct posix_acl *default_acl = NULL, *acl = NULL;
+ struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -423,6 +424,8 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ dl = dentry->d_fsdata;
+
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
&lookup);
@@ -469,6 +472,16 @@ leave:
* ocfs2_delete_inode will mutex_lock again.
*/
if ((status < 0) && inode) {
+ if (dl) {
+ ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ BUG_ON(dl->dl_count != 1);
+ spin_lock(&dentry_attach_lock);
+ dentry->d_fsdata = NULL;
+ spin_unlock(&dentry_attach_lock);
+ kfree(dl);
+ iput(inode);
+ }
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
@@ -991,6 +1004,65 @@ leave:
return status;
}
+static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
+ u64 src_inode_no, u64 dest_inode_no)
+{
+ int ret = 0, i = 0;
+ u64 parent_inode_no = 0;
+ u64 child_inode_no = src_inode_no;
+ struct inode *child_inode;
+
+#define MAX_LOOKUP_TIMES 32
+ while (1) {
+ child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ break;
+ }
+
+ ret = ocfs2_inode_lock(child_inode, NULL, 0);
+ if (ret < 0) {
+ iput(child_inode);
+ if (ret != -ENOENT)
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
+ &parent_inode_no);
+ ocfs2_inode_unlock(child_inode, 0);
+ iput(child_inode);
+ if (ret < 0) {
+ ret = -ENOENT;
+ break;
+ }
+
+ if (parent_inode_no == dest_inode_no) {
+ ret = 1;
+ break;
+ }
+
+ if (parent_inode_no == osb->root_inode->i_ino) {
+ ret = 0;
+ break;
+ }
+
+ child_inode_no = parent_inode_no;
+
+ if (++i >= MAX_LOOKUP_TIMES) {
+ mlog(ML_NOTICE, "max lookup times reached, filesystem "
+ "may have nested directories, "
+ "src inode: %llu, dest inode: %llu.\n",
+ (unsigned long long)src_inode_no,
+ (unsigned long long)dest_inode_no);
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* The only place this should be used is rename!
* if they have the same id, then the 1st one is the only one locked.
@@ -1002,6 +1074,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
struct inode *inode2)
{
int status;
+ int inode1_is_ancestor, inode2_is_ancestor;
struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
struct buffer_head **tmpbh;
@@ -1015,9 +1088,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
if (*bh2)
*bh2 = NULL;
- /* we always want to lock the one with the lower lockid first. */
+ /* we always want to lock the one with the lower lockid first.
+ * and if they are nested, we lock ancestor first */
if (oi1->ip_blkno != oi2->ip_blkno) {
- if (oi1->ip_blkno < oi2->ip_blkno) {
+ inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
+ oi1->ip_blkno);
+ if (inode1_is_ancestor < 0) {
+ status = inode1_is_ancestor;
+ goto bail;
+ }
+
+ inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
+ oi2->ip_blkno);
+ if (inode2_is_ancestor < 0) {
+ status = inode2_is_ancestor;
+ goto bail;
+ }
+
+ if ((inode1_is_ancestor == 1) ||
+ (oi1->ip_blkno < oi2->ip_blkno &&
+ inode2_is_ancestor == 0)) {
/* switch id1 and id2 around */
tmpbh = bh2;
bh2 = bh1;
@@ -1098,6 +1188,7 @@ static int ocfs2_rename(struct inode *old_dir,
struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
struct ocfs2_dir_lookup_result target_insert = { NULL, };
+ bool should_add_orphan = false;
/* At some point it might be nice to break this function up a
* bit. */
@@ -1134,6 +1225,22 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
rename_lock = 1;
+
+ /* here we cannot guarantee the inodes haven't just been
+ * changed, so check if they are nested again */
+ status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
+ old_inode->i_ino);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ } else if (status == 1) {
+ status = -EPERM;
+ mlog(ML_ERROR, "src inode %llu should not be ancestor "
+ "of new dir inode %llu\n",
+ (unsigned long long)old_inode->i_ino,
+ (unsigned long long)new_dir->i_ino);
+ goto bail;
+ }
}
/* if old and new are the same, this'll just do one lock. */
@@ -1304,6 +1411,7 @@ static int ocfs2_rename(struct inode *old_dir,
mlog_errno(status);
goto bail;
}
+ should_add_orphan = true;
}
} else {
BUG_ON(new_dentry->d_parent->d_inode != new_dir);
@@ -1348,17 +1456,6 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
- if (S_ISDIR(new_inode->i_mode) ||
- (ocfs2_read_links_count(newfe) == 1)) {
- status = ocfs2_orphan_add(osb, handle, new_inode,
- newfe_bh, orphan_name,
- &orphan_insert, orphan_dir);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
- }
-
/* change the dirent to point to the correct inode */
status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
old_inode);
@@ -1373,6 +1470,15 @@ static int ocfs2_rename(struct inode *old_dir,
else
ocfs2_add_links_count(newfe, -1);
ocfs2_journal_dirty(handle, newfe_bh);
+ if (should_add_orphan) {
+ status = ocfs2_orphan_add(osb, handle, new_inode,
+ newfe_bh, orphan_name,
+ &orphan_insert, orphan_dir);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
} else {
/* if the name was not found in new_dir, add it now */
status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1642,6 +1748,7 @@ static int ocfs2_symlink(struct inode *dir,
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
+ struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_symlink_begin(dir, dentry, symname,
dentry->d_name.len, dentry->d_name.name);
@@ -1830,6 +1937,8 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
+ dl = dentry->d_fsdata;
+
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
&lookup);
@@ -1864,6 +1973,16 @@ bail:
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
if ((status < 0) && inode) {
+ if (dl) {
+ ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ BUG_ON(dl->dl_count != 1);
+ spin_lock(&dentry_attach_lock);
+ dentry->d_fsdata = NULL;
+ spin_unlock(&dentry_attach_lock);
+ kfree(dl);
+ iput(inode);
+ }
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 6ba4bcbc4796..714e53b9cc66 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1408,10 +1408,9 @@ static void swap_refcount_rec(void *a, void *b, int size)
{
struct ocfs2_refcount_rec *l = a, *r = b, tmp;
- tmp = *(struct ocfs2_refcount_rec *)l;
- *(struct ocfs2_refcount_rec *)l =
- *(struct ocfs2_refcount_rec *)r;
- *(struct ocfs2_refcount_rec *)r = tmp;
+ tmp = *l;
+ *l = *r;
+ *r = tmp;
}
/*
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 83f1a665ae97..5d965e83bd43 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -709,7 +709,7 @@ static struct ctl_table ocfs2_root_table[] = {
{ }
};
-static struct ctl_table_header *ocfs2_table_header = NULL;
+static struct ctl_table_header *ocfs2_table_header;
/*
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a7cdd56f4c79..c7a89cea5c5d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -75,7 +75,7 @@
#include "buffer_head_io.h"
-static struct kmem_cache *ocfs2_inode_cachep = NULL;
+static struct kmem_cache *ocfs2_inode_cachep;
struct kmem_cache *ocfs2_dquot_cachep;
struct kmem_cache *ocfs2_qf_chunk_cachep;
@@ -85,7 +85,7 @@ struct kmem_cache *ocfs2_qf_chunk_cachep;
* workqueue and schedule on our own. */
struct workqueue_struct *ocfs2_wq = NULL;
-static struct dentry *ocfs2_debugfs_root = NULL;
+static struct dentry *ocfs2_debugfs_root;
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
@@ -2292,8 +2292,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail;
}
- strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
- osb->vol_label[63] = '\0';
+ strlcpy(osb->vol_label, di->id2.i_super.s_label,
+ OCFS2_MAX_VOL_LABEL_LEN);
osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
osb->first_cluster_group_blkno =
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 52eaf33d346f..82e17b076ce7 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -67,7 +67,7 @@ struct ocfs2_meta_cache_item {
sector_t c_block;
};
-static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
+static struct kmem_cache *ocfs2_uptodate_cachep;
u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
{
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 442177b1119a..fa6d6a4e85b3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -424,7 +424,6 @@ const struct file_operations proc_tid_maps_operations = {
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
- struct vm_area_struct *vma;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
@@ -438,15 +437,16 @@ struct mem_size_stats {
u64 pss;
};
-
-static void smaps_pte_entry(pte_t ptent, unsigned long addr,
- unsigned long ptent_size, struct mm_walk *walk)
+static int smaps_pte(pte_t *pte, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
- struct vm_area_struct *vma = mss->vma;
+ struct vm_area_struct *vma = walk->vma;
pgoff_t pgoff = linear_page_index(vma, addr);
struct page *page = NULL;
int mapcount;
+ pte_t ptent = *pte;
+ unsigned long ptent_size = end - addr;
if (pte_present(ptent)) {
page = vm_normal_page(vma, addr, ptent);
@@ -463,7 +463,7 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
}
if (!page)
- return;
+ return 0;
if (PageAnon(page))
mss->anonymous += ptent_size;
@@ -489,35 +489,22 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
mss->private_clean += ptent_size;
mss->pss += (ptent_size << PSS_SHIFT);
}
+ return 0;
}
-static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int smaps_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
{
struct mem_size_stats *mss = walk->private;
- struct vm_area_struct *vma = mss->vma;
- pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
+ if (pmd_trans_huge_lock(pmd, walk->vma, &ptl) == 1) {
+ smaps_pte((pte_t *)pmd, addr, addr + HPAGE_PMD_SIZE, walk);
spin_unlock(ptl);
mss->anonymous_thp += HPAGE_PMD_SIZE;
- return 0;
+ /* don't call smaps_pte() */
+ walk->skip = 1;
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
- /*
- * The mmap_sem held all the way back in m_start() is what
- * keeps khugepaged out of here and from collapsing things
- * in here.
- */
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; pte++, addr += PAGE_SIZE)
- smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
return 0;
}
@@ -582,16 +569,16 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
struct vm_area_struct *vma = v;
struct mem_size_stats mss;
struct mm_walk smaps_walk = {
- .pmd_entry = smaps_pte_range,
+ .pmd_entry = smaps_pmd,
+ .pte_entry = smaps_pte,
.mm = vma->vm_mm,
+ .vma = vma,
.private = &mss,
};
memset(&mss, 0, sizeof mss);
- mss.vma = vma;
/* mmap_sem is held in m_start */
- if (vma->vm_mm && !is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
+ walk_page_vma(vma, &smaps_walk);
show_map_vma(m, vma, is_pid);
@@ -712,7 +699,6 @@ enum clear_refs_types {
};
struct clear_refs_private {
- struct vm_area_struct *vma;
enum clear_refs_types type;
};
@@ -737,48 +723,52 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_file_clear_soft_dirty(ptent);
}
- if (vma->vm_flags & VM_SOFTDIRTY)
- vma->vm_flags &= ~VM_SOFTDIRTY;
-
set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif
}
-static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+static int clear_refs_pte(pte_t *pte, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct clear_refs_private *cp = walk->private;
- struct vm_area_struct *vma = cp->vma;
- pte_t *pte, ptent;
- spinlock_t *ptl;
+ struct vm_area_struct *vma = walk->vma;
struct page *page;
- split_huge_page_pmd(vma, addr, pmd);
- if (pmd_trans_unstable(pmd))
+ if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+ clear_soft_dirty(vma, addr, pte);
return 0;
+ }
+ if (!pte_present(*pte))
+ return 0;
+ page = vm_normal_page(vma, addr, *pte);
+ if (!page)
+ return 0;
+ /* Clear accessed and referenced bits. */
+ ptep_test_and_clear_young(vma, addr, pte);
+ ClearPageReferenced(page);
+ return 0;
+}
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; pte++, addr += PAGE_SIZE) {
- ptent = *pte;
-
- if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
- clear_soft_dirty(vma, addr, pte);
- continue;
- }
-
- if (!pte_present(ptent))
- continue;
-
- page = vm_normal_page(vma, addr, ptent);
- if (!page)
- continue;
+static int clear_refs_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct clear_refs_private *cp = walk->private;
+ struct vm_area_struct *vma = walk->vma;
- /* Clear accessed and referenced bits. */
- ptep_test_and_clear_young(vma, addr, pte);
- ClearPageReferenced(page);
+ /*
+ * Writing 1 to /proc/pid/clear_refs affects all pages.
+ * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
+ * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
+ * Writing 4 to /proc/pid/clear_refs affects all pages.
+ */
+ if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
+ walk->skip = 1;
+ if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
+ walk->skip = 1;
+ if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ vma->vm_flags &= ~VM_SOFTDIRTY;
}
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
return 0;
}
@@ -807,8 +797,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
if (type == CLEAR_REFS_SOFT_DIRTY) {
soft_dirty_cleared = true;
- pr_warn_once("The pagemap bits 55-60 has changed their meaning! "
- "See the linux/Documentation/vm/pagemap.txt for details.\n");
+ pr_warn_once("The pagemap bits 55-60 has changed their meaning!"
+ " See the linux/Documentation/vm/pagemap.txt for "
+ "details.\n");
}
task = get_proc_task(file_inode(file));
@@ -820,33 +811,16 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
.type = type,
};
struct mm_walk clear_refs_walk = {
- .pmd_entry = clear_refs_pte_range,
+ .pte_entry = clear_refs_pte,
+ .test_walk = clear_refs_test_walk,
.mm = mm,
.private = &cp,
};
down_read(&mm->mmap_sem);
if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_start(mm, 0, -1);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- cp.vma = vma;
- if (is_vm_hugetlb_page(vma))
- continue;
- /*
- * Writing 1 to /proc/pid/clear_refs affects all pages.
- *
- * Writing 2 to /proc/pid/clear_refs only affects
- * Anonymous pages.
- *
- * Writing 3 to /proc/pid/clear_refs only affects file
- * mapped pages.
- */
- if (type == CLEAR_REFS_ANON && vma->vm_file)
- continue;
- if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
- continue;
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
- }
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ walk_page_vma(vma, &clear_refs_walk);
if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_end(mm, 0, -1);
flush_tlb_mm(mm);
@@ -987,19 +961,33 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap
}
#endif
-static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+static int pagemap_pte(pte_t *pte, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = walk->vma;
struct pagemapread *pm = walk->private;
- spinlock_t *ptl;
- pte_t *pte;
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+
+ if (vma && vma->vm_start <= addr && end <= vma->vm_end) {
+ pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+ /* unmap before userspace copy */
+ pte_unmap(pte);
+ }
+ return add_to_pagemap(addr, &pme, pm);
+}
+
+static int pagemap_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
int err = 0;
+ struct vm_area_struct *vma = walk->vma;
+ struct pagemapread *pm = walk->private;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ spinlock_t *ptl;
- /* find the first VMA at or above 'addr' */
- vma = find_vma(walk->mm, addr);
- if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+ if (!vma)
+ return err;
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
int pmd_flags2;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1018,41 +1006,9 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
break;
}
spin_unlock(ptl);
- return err;
+ /* don't call pagemap_pte() */
+ walk->skip = 1;
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
- for (; addr != end; addr += PAGE_SIZE) {
- int flags2;
-
- /* check to see if we've left 'vma' behind
- * and need a new, higher one */
- if (vma && (addr >= vma->vm_end)) {
- vma = find_vma(walk->mm, addr);
- if (vma && (vma->vm_flags & VM_SOFTDIRTY))
- flags2 = __PM_SOFT_DIRTY;
- else
- flags2 = 0;
- pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
- }
-
- /* check that 'vma' actually covers this address,
- * and that it isn't a huge page vma */
- if (vma && (vma->vm_start <= addr) &&
- !is_vm_hugetlb_page(vma)) {
- pte = pte_offset_map(pmd, addr);
- pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
- /* unmap before userspace copy */
- pte_unmap(pte);
- }
- err = add_to_pagemap(addr, &pme, pm);
- if (err)
- return err;
- }
-
- cond_resched();
-
return err;
}
@@ -1070,24 +1026,22 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *
}
/* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int pagemap_hugetlb(pte_t *pte, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = walk->vma;
int err = 0;
int flags2;
pagemap_entry_t pme;
+ unsigned long hmask;
- vma = find_vma(walk->mm, addr);
- WARN_ON_ONCE(!vma);
-
- if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+ if (vma->vm_flags & VM_SOFTDIRTY)
flags2 = __PM_SOFT_DIRTY;
else
flags2 = 0;
+ hmask = huge_page_mask(hstate_vma(vma));
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
@@ -1095,9 +1049,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
if (err)
return err;
}
-
- cond_resched();
-
return err;
}
#endif /* HUGETLB_PAGE */
@@ -1164,10 +1115,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!mm || IS_ERR(mm))
goto out_free;
- pagemap_walk.pmd_entry = pagemap_pte_range;
+ pagemap_walk.pte_entry = pagemap_pte;
+ pagemap_walk.pmd_entry = pagemap_pmd;
pagemap_walk.pte_hole = pagemap_pte_hole;
#ifdef CONFIG_HUGETLB_PAGE
- pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
+ pagemap_walk.hugetlb_entry = pagemap_hugetlb;
#endif
pagemap_walk.mm = mm;
pagemap_walk.private = &pm;
@@ -1243,7 +1195,6 @@ const struct file_operations proc_pagemap_operations = {
#ifdef CONFIG_NUMA
struct numa_maps {
- struct vm_area_struct *vma;
unsigned long pages;
unsigned long anon;
unsigned long active;
@@ -1309,44 +1260,42 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
return page;
}
-static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+static int gather_pte_stats(pte_t *pte, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct numa_maps *md;
- spinlock_t *ptl;
- pte_t *orig_pte;
- pte_t *pte;
+ struct numa_maps *md = walk->private;
- md = walk->private;
+ struct page *page = can_gather_numa_stats(*pte, walk->vma, addr);
+ if (!page)
+ return 0;
+ gather_stats(page, md, pte_dirty(*pte), 1);
+ return 0;
+}
- if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
+static int gather_pmd_stats(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct numa_maps *md = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ spinlock_t *ptl;
+
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
- page = can_gather_numa_stats(huge_pte, md->vma, addr);
+ page = can_gather_numa_stats(huge_pte, vma, addr);
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
HPAGE_PMD_SIZE/PAGE_SIZE);
spin_unlock(ptl);
- return 0;
+ /* don't call gather_pte_stats() */
+ walk->skip = 1;
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
- orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- do {
- struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
- if (!page)
- continue;
- gather_stats(page, md, pte_dirty(*pte), 1);
-
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(orig_pte, ptl);
return 0;
}
#ifdef CONFIG_HUGETLB_PAGE
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long end, struct mm_walk *walk)
+static int gather_hugetlb_stats(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
struct numa_maps *md;
struct page *page;
@@ -1354,6 +1303,9 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
if (pte_none(*pte))
return 0;
+ if (!pte_present(*pte))
+ return 0;
+
page = pte_page(*pte);
if (!page)
return 0;
@@ -1364,8 +1316,8 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
}
#else
-static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long end, struct mm_walk *walk)
+static int gather_hugetlb_stats(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
return 0;
}
@@ -1394,12 +1346,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
/* Ensure we start with an empty set of numa_maps statistics. */
memset(md, 0, sizeof(*md));
- md->vma = vma;
-
- walk.hugetlb_entry = gather_hugetbl_stats;
- walk.pmd_entry = gather_pte_stats;
+ walk.hugetlb_entry = gather_hugetlb_stats;
+ walk.pmd_entry = gather_pmd_stats;
+ walk.pte_entry = gather_pte_stats;
walk.private = md;
walk.mm = mm;
+ walk.vma = vma;
pol = get_vma_policy(task, vma, vma->vm_start);
mpol_to_str(buffer, sizeof(buffer), pol);
@@ -1430,6 +1382,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
if (is_vm_hugetlb_page(vma))
seq_printf(m, " huge");
+ /* mmap_sem is held by m_start */
walk_page_range(vma->vm_start, vma->vm_end, &walk);
if (!md->pages)
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 46d269e38706..0a9b72cdfeca 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -18,6 +18,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "pstore: " fmt
+
#include <linux/atomic.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -224,14 +226,12 @@ static void allocate_buf_for_compression(void)
zlib_inflate_workspacesize());
stream.workspace = kmalloc(size, GFP_KERNEL);
if (!stream.workspace) {
- pr_err("pstore: No memory for compression workspace; "
- "skipping compression\n");
+ pr_err("No memory for compression workspace; skipping compression\n");
kfree(big_oops_buf);
big_oops_buf = NULL;
}
} else {
- pr_err("No memory for uncompressed data; "
- "skipping compression\n");
+ pr_err("No memory for uncompressed data; skipping compression\n");
stream.workspace = NULL;
}
@@ -455,8 +455,7 @@ int pstore_register(struct pstore_info *psi)
add_timer(&pstore_timer);
}
- pr_info("pstore: Registered %s as persistent store backend\n",
- psi->name);
+ pr_info("Registered %s as persistent store backend\n", psi->name);
return 0;
}
@@ -502,8 +501,8 @@ void pstore_get_records(int quiet)
size = unzipped_len;
compressed = false;
} else {
- pr_err("pstore: decompression failed;"
- "returned %d\n", unzipped_len);
+ pr_err("decompression failed;returned %d\n",
+ unzipped_len);
compressed = true;
}
}
@@ -524,8 +523,8 @@ out:
mutex_unlock(&psi->read_mutex);
if (failed)
- printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
- failed, psi->name);
+ pr_warn("failed to load %d record(s) from '%s'\n",
+ failed, psi->name);
}
static void pstore_dowork(struct work_struct *work)
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index ff7e3d4df5a1..34a1e5aa848c 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -12,6 +12,8 @@
*
*/
+#define pr_fmt(fmt) "persistent_ram: " fmt
+
#include <linux/device.h>
#include <linux/err.h>
#include <linux/errno.h>
@@ -205,12 +207,10 @@ static void persistent_ram_ecc_old(struct persistent_ram_zone *prz)
size = buffer->data + prz->buffer_size - block;
numerr = persistent_ram_decode_rs8(prz, block, size, par);
if (numerr > 0) {
- pr_devel("persistent_ram: error in block %p, %d\n",
- block, numerr);
+ pr_devel("error in block %p, %d\n", block, numerr);
prz->corrected_bytes += numerr;
} else if (numerr < 0) {
- pr_devel("persistent_ram: uncorrectable error in block %p\n",
- block);
+ pr_devel("uncorrectable error in block %p\n", block);
prz->bad_blocks++;
}
block += prz->ecc_info.block_size;
@@ -257,7 +257,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
prz->rs_decoder = init_rs(prz->ecc_info.symsize, prz->ecc_info.poly,
0, 1, prz->ecc_info.ecc_size);
if (prz->rs_decoder == NULL) {
- pr_info("persistent_ram: init_rs failed\n");
+ pr_info("init_rs failed\n");
return -EINVAL;
}
@@ -267,10 +267,10 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
numerr = persistent_ram_decode_rs8(prz, buffer, sizeof(*buffer),
prz->par_header);
if (numerr > 0) {
- pr_info("persistent_ram: error in header, %d\n", numerr);
+ pr_info("error in header, %d\n", numerr);
prz->corrected_bytes += numerr;
} else if (numerr < 0) {
- pr_info("persistent_ram: uncorrectable error in header\n");
+ pr_info("uncorrectable error in header\n");
prz->bad_blocks++;
}
@@ -317,7 +317,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
prz->old_log = kmalloc(size, GFP_KERNEL);
}
if (!prz->old_log) {
- pr_err("persistent_ram: failed to allocate buffer\n");
+ pr_err("failed to allocate buffer\n");
return;
}
@@ -396,8 +396,8 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
if (!pages) {
- pr_err("%s: Failed to allocate array for %u pages\n", __func__,
- page_count);
+ pr_err("%s: Failed to allocate array for %u pages\n",
+ __func__, page_count);
return NULL;
}
@@ -462,19 +462,17 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
if (prz->buffer->sig == sig) {
if (buffer_size(prz) > prz->buffer_size ||
buffer_start(prz) > buffer_size(prz))
- pr_info("persistent_ram: found existing invalid buffer,"
- " size %zu, start %zu\n",
- buffer_size(prz), buffer_start(prz));
+ pr_info("found existing invalid buffer, size %zu, start %zu\n",
+ buffer_size(prz), buffer_start(prz));
else {
- pr_debug("persistent_ram: found existing buffer,"
- " size %zu, start %zu\n",
- buffer_size(prz), buffer_start(prz));
+ pr_debug("found existing buffer, size %zu, start %zu\n",
+ buffer_size(prz), buffer_start(prz));
persistent_ram_save_old(prz);
return 0;
}
} else {
- pr_debug("persistent_ram: no valid data in buffer"
- " (sig = 0x%08x)\n", prz->buffer->sig);
+ pr_debug("no valid data in buffer (sig = 0x%08x)\n",
+ prz->buffer->sig);
}
prz->buffer->sig = sig;
@@ -509,7 +507,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
prz = kzalloc(sizeof(struct persistent_ram_zone), GFP_KERNEL);
if (!prz) {
- pr_err("persistent_ram: failed to allocate persistent ram zone\n");
+ pr_err("failed to allocate persistent ram zone\n");
goto err;
}
diff --git a/fs/readdir.c b/fs/readdir.c
index 5b53d995cae6..33fd92208cb7 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -13,6 +13,7 @@
#include <linux/stat.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/dirent.h>
#include <linux/security.h>
#include <linux/syscalls.h>
@@ -40,6 +41,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
ctx->pos = file->f_pos;
res = file->f_op->iterate(file, ctx);
file->f_pos = ctx->pos;
+ fsnotify_access(file);
file_accessed(file);
}
mutex_unlock(&inode->i_mutex);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index dc9a6829f7c6..1bcffeab713c 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -142,7 +142,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
int org = *beg;
BUG_ON(!th->t_trans_id);
-
RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
"range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
PROC_INFO_INC(s, scan_bitmap.bmap);
@@ -321,7 +320,6 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
unsigned int off_max = s->s_blocksize << 3;
BUG_ON(!th->t_trans_id);
-
PROC_INFO_INC(s, scan_bitmap.call);
if (SB_FREE_BLOCKS(s) <= 0)
return 0; // No point in looking for more free blocks
@@ -388,9 +386,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
unsigned int nr, offset;
BUG_ON(!th->t_trans_id);
-
PROC_INFO_INC(s, free_block);
-
rs = SB_DISK_SUPER_BLOCK(s);
sbh = SB_BUFFER_WITH_SB(s);
apbi = SB_AP_BITMAP(s);
@@ -435,8 +431,8 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
int for_unformatted)
{
struct super_block *s = th->t_super;
- BUG_ON(!th->t_trans_id);
+ BUG_ON(!th->t_trans_id);
RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
if (!is_reusable(s, block, 1))
return;
@@ -471,6 +467,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
unsigned long save = ei->i_prealloc_block;
int dirty = 0;
struct inode *inode = &ei->vfs_inode;
+
BUG_ON(!th->t_trans_id);
#ifdef CONFIG_REISERFS_CHECK
if (ei->i_prealloc_count < 0)
@@ -494,6 +491,7 @@ void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
struct inode *inode)
{
struct reiserfs_inode_info *ei = REISERFS_I(inode);
+
BUG_ON(!th->t_trans_id);
if (ei->i_prealloc_count)
__discard_prealloc(th, ei);
@@ -504,7 +502,6 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
BUG_ON(!th->t_trans_id);
-
while (!list_empty(plist)) {
struct reiserfs_inode_info *ei;
ei = list_entry(plist->next, struct reiserfs_inode_info,
@@ -562,7 +559,7 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
if (!strcmp(this_char, "displacing_new_packing_localities")) {
SET_OPTION(displacing_new_packing_localities);
continue;
- };
+ }
if (!strcmp(this_char, "old_hashed_relocation")) {
SET_OPTION(old_hashed_relocation);
@@ -729,6 +726,7 @@ void show_alloc_options(struct seq_file *seq, struct super_block *s)
static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
{
char *hash_in;
+
if (hint->formatted_node) {
hash_in = (char *)&hint->key.k_dir_id;
} else {
@@ -757,6 +755,7 @@ static void dirid_groups(reiserfs_blocknr_hint_t * hint)
__u32 dirid = 0;
int bm = 0;
struct super_block *sb = hint->th->t_super;
+
if (hint->inode)
dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
else if (hint->formatted_node)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index b14706a05d52..615cd9ab7940 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -228,10 +228,10 @@ const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
/* Maximal possible key. It is never in the tree. */
static const struct reiserfs_key MAX_KEY = {
- __constant_cpu_to_le32(0xffffffff),
- __constant_cpu_to_le32(0xffffffff),
- {{__constant_cpu_to_le32(0xffffffff),
- __constant_cpu_to_le32(0xffffffff)},}
+ cpu_to_le32(0xffffffff),
+ cpu_to_le32(0xffffffff),
+ {{cpu_to_le32(0xffffffff),
+ cpu_to_le32(0xffffffff)},}
};
/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 9e1bb79f7e6f..887d6d270080 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -25,7 +25,7 @@
#define ERROR(s, args...) pr_err("SQUASHFS error: "s, ## args)
-#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
+#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args)
/* block.c */
extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 0ab1de4b39a5..184323cac1a4 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -24,7 +24,7 @@
#define INVBLOCK ((u64)-1L)
-static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned, int *);
+static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned);
static u64 ufs_alloc_fragments(struct inode *, unsigned, u64, unsigned, int *);
static u64 ufs_alloccg_block(struct inode *, struct ufs_cg_private_info *, u64, int *);
static u64 ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, u64, unsigned);
@@ -432,7 +432,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
/*
* resize block
*/
- result = ufs_add_fragments (inode, tmp, oldcount, newcount, err);
+ result = ufs_add_fragments(inode, tmp, oldcount, newcount);
if (result) {
*err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
@@ -491,7 +491,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
}
static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
- unsigned oldcount, unsigned newcount, int *err)
+ unsigned oldcount, unsigned newcount)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
diff --git a/include/asm-generic/ioctl.h b/include/asm-generic/ioctl.h
index d17295b290fa..297fb0d7cd6c 100644
--- a/include/asm-generic/ioctl.h
+++ b/include/asm-generic/ioctl.h
@@ -3,10 +3,15 @@
#include <uapi/asm-generic/ioctl.h>
+#ifdef __CHECKER__
+#define _IOC_TYPECHECK(t) (sizeof(t))
+#else
/* provoke compile error for invalid uses of size argument */
extern unsigned int __invalid_size_argument_for_IOC;
#define _IOC_TYPECHECK(t) \
((sizeof(t) == sizeof(t[1]) && \
sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
sizeof(t) : __invalid_size_argument_for_IOC)
+#endif
+
#endif /* _ASM_GENERIC_IOCTL_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index a8015a7a55bb..53b2acc38213 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -233,6 +233,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
# define pte_accessible(mm, pte) ((void)(pte), 1)
#endif
+#ifndef pte_present_nonuma
+#define pte_present_nonuma(pte) pte_present(pte)
+#endif
+
#ifndef flush_tlb_fix_spurious_fault
#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
#endif
@@ -670,7 +674,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
static inline int pte_numa(pte_t pte)
{
return (pte_flags(pte) &
- (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+ (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA;
}
#endif
@@ -678,7 +682,7 @@ static inline int pte_numa(pte_t pte)
static inline int pmd_numa(pmd_t pmd)
{
return (pmd_flags(pmd) &
- (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+ (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA;
}
#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f342bf65681e..1f1ecc711612 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1582,6 +1582,7 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
+ int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
@@ -1600,6 +1601,9 @@ struct block_device_operations {
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
unsigned long);
+extern int bdev_read_page(struct block_device *, sector_t, struct page *);
+extern int bdev_write_page(struct block_device *, sector_t, struct page *,
+ struct writeback_control *);
#else /* CONFIG_BLOCK */
/*
* stubs for when the block layer is configured out
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index db51fe4fe317..4e2bd4c95b66 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -58,9 +58,9 @@ extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
* Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
* the architecture-specific code should honor this).
*
- * If flags is 0, then the return value is always 0 (success). If
- * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
- * memory already was reserved.
+ * If flags is BOOTMEM_DEFAULT, then the return value is always 0 (success).
+ * If flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the memory
+ * already was reserved.
*/
#define BOOTMEM_DEFAULT 0
#define BOOTMEM_EXCLUSIVE (1<<0)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 7cbf837a279c..324329ceea1e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -207,8 +207,6 @@ void block_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc);
-int block_write_full_page_endio(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc, bh_end_io_t *handler);
int block_read_full_page(struct page*, get_block_t*);
int block_is_partially_uptodate(struct page *page, unsigned long from,
unsigned long count);
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 7e1c76e3cd68..01e3132820da 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask,
- bool sync, bool *contended);
+ enum migrate_mode mode, bool *contended);
extern void compact_pgdat(pg_data_t *pgdat, int order);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -91,7 +91,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
#else
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended)
+ enum migrate_mode mode, bool *contended)
{
return COMPACT_CONTINUE;
}
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index b19d3dc2e651..ade2390ffe92 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -12,10 +12,31 @@
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
+#include <linux/jump_label.h>
#ifdef CONFIG_CPUSETS
-extern int number_of_cpusets; /* How many cpusets are defined in system? */
+extern struct static_key cpusets_enabled_key;
+static inline bool cpusets_enabled(void)
+{
+ return static_key_false(&cpusets_enabled_key);
+}
+
+static inline int nr_cpusets(void)
+{
+ /* jump label reference count + the top-level cpuset */
+ return static_key_count(&cpusets_enabled_key) + 1;
+}
+
+static inline void cpuset_inc(void)
+{
+ static_key_slow_inc(&cpusets_enabled_key);
+}
+
+static inline void cpuset_dec(void)
+{
+ static_key_slow_dec(&cpusets_enabled_key);
+}
extern int cpuset_init(void);
extern void cpuset_init_smp(void);
@@ -32,13 +53,13 @@ extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
{
- return number_of_cpusets <= 1 ||
+ return nr_cpusets() <= 1 ||
__cpuset_node_allowed_softwall(node, gfp_mask);
}
static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
{
- return number_of_cpusets <= 1 ||
+ return nr_cpusets() <= 1 ||
__cpuset_node_allowed_hardwall(node, gfp_mask);
}
@@ -124,6 +145,8 @@ static inline void set_mems_allowed(nodemask_t nodemask)
#else /* !CONFIG_CPUSETS */
+static inline bool cpusets_enabled(void) { return false; }
+
static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {}
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644
index 000000000000..bba7a4d692b3
--- /dev/null
+++ b/include/linux/crc64_ecma.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata: pointer to the data to compute checksum for.
+ * @nbytes: number of bytes in data buffer.
+ * @seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 3b28f937d959..772eab5d524a 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -88,7 +88,8 @@ static inline void dma_contiguous_set_default(struct cma *cma)
void dma_contiguous_reserve(phys_addr_t addr_limit);
int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
- phys_addr_t limit, struct cma **res_cma);
+ phys_addr_t limit, struct cma **res_cma,
+ bool fixed);
/**
* dma_declare_contiguous() - reserve area for contiguous memory handling
@@ -108,7 +109,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
{
struct cma *cma;
int ret;
- ret = dma_contiguous_reserve_area(size, base, limit, &cma);
+ ret = dma_contiguous_reserve_area(size, base, limit, &cma, true);
if (ret == 0)
dev_set_cma_area(dev, cma);
@@ -136,7 +137,9 @@ static inline void dma_contiguous_set_default(struct cma *cma) { }
static inline void dma_contiguous_reserve(phys_addr_t limit) { }
static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
- phys_addr_t limit, struct cma **res_cma) {
+ phys_addr_t limit, struct cma **res_cma,
+ bool fixed)
+{
return -ENOSYS;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e92d551518d..7d8978dfc481 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2408,8 +2408,12 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
-extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
+static inline int generic_file_remap_pages(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long size, pgoff_t pgoff)
+{
+ BUG();
+ return 0;
+}
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..454c99fdb79d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+ unsigned int order);
+
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
@@ -369,11 +371,11 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
-extern void free_hot_cold_page(struct page *page, int cold);
-extern void free_hot_cold_page_list(struct list_head *list, int cold);
+extern void free_hot_cold_page(struct page *page, bool cold);
+extern void free_hot_cold_page_list(struct list_head *list, bool cold);
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b65166de1d9d..0683f55cb2f4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -41,8 +41,6 @@ extern int hugetlb_max_hstate __read_mostly;
struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
void hugepage_put_subpool(struct hugepage_subpool *spool);
-int PageHuge(struct page *page);
-
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -109,11 +107,6 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
#else /* !CONFIG_HUGETLB_PAGE */
-static inline int PageHuge(struct page *page)
-{
- return 0;
-}
-
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
}
@@ -343,6 +336,11 @@ static inline unsigned huge_page_shift(struct hstate *h)
return h->order + PAGE_SHIFT;
}
+static inline bool hstate_is_gigantic(struct hstate *h)
+{
+ return huge_page_order(h) >= MAX_ORDER;
+}
+
static inline unsigned int pages_per_huge_page(struct hstate *h)
{
return 1 << h->order;
diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h
index 2bb681fbeb35..4d60c82e9fda 100644
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@@ -10,6 +10,8 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
return !!(vma->vm_flags & VM_HUGETLB);
}
+int PageHuge(struct page *page);
+
#else
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -17,6 +19,11 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
return 0;
}
+static inline int PageHuge(struct page *page)
+{
+ return 0;
+}
+
#endif
#endif
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 6af3400b9b2f..013fd9bc4cb6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -29,21 +29,24 @@
struct idr_layer {
int prefix; /* the ID prefix of this idr_layer */
- DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */
+ int layer; /* distance from leaf */
struct idr_layer __rcu *ary[1<<IDR_BITS];
int count; /* When zero, we can release it */
- int layer; /* distance from leaf */
- struct rcu_head rcu_head;
+ union {
+ /* A zero bit means "space here" */
+ DECLARE_BITMAP(bitmap, IDR_SIZE);
+ struct rcu_head rcu_head;
+ };
};
struct idr {
struct idr_layer __rcu *hint; /* the last layer allocated from */
struct idr_layer __rcu *top;
- struct idr_layer *id_free;
int layers; /* only valid w/o concurrent changes */
- int id_free_cnt;
int cur; /* current pos for cyclic allocation */
spinlock_t lock;
+ int id_free_cnt;
+ struct idr_layer *id_free;
};
#define IDR_INIT(name) \
diff --git a/include/linux/input.h b/include/linux/input.h
index 82ce323b9986..6453b22372ac 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -79,6 +79,7 @@ struct input_value {
* @led: reflects current state of device's LEDs
* @snd: reflects current state of sound effects
* @sw: reflects current state of device's switches
+ * @leds: leds objects for the device's LEDs
* @open: this method is called when the very first user calls
* input_open_device(). The driver must prepare the device
* to start generating events (start polling thread,
@@ -164,6 +165,8 @@ struct input_dev {
unsigned long snd[BITS_TO_LONGS(SND_CNT)];
unsigned long sw[BITS_TO_LONGS(SW_CNT)];
+ struct led_classdev *leds;
+
int (*open)(struct input_dev *dev);
void (*close)(struct input_dev *dev);
int (*flush)(struct input_dev *dev, struct file *file);
@@ -531,4 +534,22 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
int input_ff_create_memless(struct input_dev *dev, void *data,
int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
+#ifdef CONFIG_INPUT_LEDS
+
+int input_led_connect(struct input_dev *dev);
+void input_led_disconnect(struct input_dev *dev);
+
+#else
+
+static inline int input_led_connect(struct input_dev *dev)
+{
+ return 0;
+}
+
+static inline void input_led_disconnect(struct input_dev *dev)
+{
+}
+
+#endif
+
#endif
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 5c1dfb2a9e73..784304b222b3 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -69,6 +69,10 @@ struct static_key {
# include <asm/jump_label.h>
# define HAVE_JUMP_LABEL
+#else
+struct static_key {
+ atomic_t enabled;
+};
#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
enum jump_label_type {
@@ -79,6 +83,12 @@ enum jump_label_type {
struct module;
#include <linux/atomic.h>
+
+static inline int static_key_count(struct static_key *key)
+{
+ return atomic_read(&key->enabled);
+}
+
#ifdef HAVE_JUMP_LABEL
#define JUMP_LABEL_TYPE_FALSE_BRANCH 0UL
@@ -134,10 +144,6 @@ extern void jump_label_apply_nops(struct module *mod);
#else /* !HAVE_JUMP_LABEL */
-struct static_key {
- atomic_t enabled;
-};
-
static __always_inline void jump_label_init(void)
{
static_key_initialized = true;
@@ -145,14 +151,14 @@ static __always_inline void jump_label_init(void)
static __always_inline bool static_key_false(struct static_key *key)
{
- if (unlikely(atomic_read(&key->enabled) > 0))
+ if (unlikely(static_key_count(key) > 0))
return true;
return false;
}
static __always_inline bool static_key_true(struct static_key *key)
{
- if (likely(atomic_read(&key->enabled) > 0))
+ if (likely(static_key_count(key) > 0))
return true;
return false;
}
@@ -194,7 +200,7 @@ static inline int jump_label_apply_nops(struct module *mod)
static inline bool static_key_enabled(struct static_key *key)
{
- return (atomic_read(&key->enabled) > 0);
+ return static_key_count(key) > 0;
}
#endif /* _LINUX_JUMP_LABEL_H */
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 5bb424659c04..057e95971014 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -30,6 +30,7 @@ extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
extern void kmemleak_free(const void *ptr) __ref;
extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+extern void kmemleak_update_trace(const void *ptr) __ref;
extern void kmemleak_not_leak(const void *ptr) __ref;
extern void kmemleak_ignore(const void *ptr) __ref;
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
@@ -83,6 +84,9 @@ static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
static inline void kmemleak_free_percpu(const void __percpu *ptr)
{
}
+static inline void kmemleak_update_trace(const void *ptr)
+{
+}
static inline void kmemleak_not_leak(const void *ptr)
{
}
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 2f4e957af656..433e0c74d643 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -31,6 +31,10 @@ struct cmos_rtc_board_info {
void (*wake_on)(struct device *dev);
void (*wake_off)(struct device *dev);
+ u32 flags;
+#define CMOS_RTC_FLAGS_NOFREQ (1 << 0)
+ int address_space;
+
u8 rtc_day_alarm; /* zero, or register index */
u8 rtc_mon_alarm; /* zero, or register index */
u8 rtc_century; /* zero, or register index */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 73dc382e72d8..b660e05b63d4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -272,6 +272,8 @@ static inline bool memblock_bottom_up(void) { return false; }
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
#define MEMBLOCK_ALLOC_ACCESSIBLE 0
+phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
+ phys_addr_t start, phys_addr_t end);
phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
phys_addr_t max_addr);
phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b569b8be5c5a..c3a53cbb88eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -92,6 +92,9 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
bool task_in_mem_cgroup(struct task_struct *task,
const struct mem_cgroup *memcg);
+extern bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
+ struct mem_cgroup *root);
+
extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -288,6 +291,12 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
return &zone->lruvec;
}
+static inline bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
+ struct mem_cgroup *root)
+{
+ return false;
+}
+
static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
return NULL;
@@ -492,13 +501,9 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
int memcg_cache_id(struct mem_cgroup *memcg);
-char *memcg_create_cache_name(struct mem_cgroup *memcg,
- struct kmem_cache *root_cache);
int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache);
void memcg_free_cache_params(struct kmem_cache *s);
-void memcg_register_cache(struct kmem_cache *s);
-void memcg_unregister_cache(struct kmem_cache *s);
int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
void memcg_update_array_size(int num_groups);
@@ -506,8 +511,10 @@ void memcg_update_array_size(int num_groups);
struct kmem_cache *
__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
+int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
+void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
+
+int __memcg_cleanup_cache_params(struct kmem_cache *s);
/**
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -534,7 +541,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble.
*/
- if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+ if (gfp & __GFP_NOFAIL)
return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true;
@@ -583,17 +590,7 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
* @cachep: the original global kmem cache
* @gfp: allocation flags.
*
- * This function assumes that the task allocating, which determines the memcg
- * in the page allocator, belongs to the same cgroup throughout the whole
- * process. Misacounting can happen if the task calls memcg_kmem_get_cache()
- * while belonging to a cgroup, and later on changes. This is considered
- * acceptable, and should only happen upon task migration.
- *
- * Before the cache is created by the memcg core, there is also a possible
- * imbalance: the task belongs to a memcg, but the cache being allocated from
- * is the global cache, since the child cache is not yet guaranteed to be
- * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
- * passed and the page allocator will not attempt any cgroup accounting.
+ * All memory allocated from a per-memcg cache is charged to the owner memcg.
*/
static __always_inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
@@ -648,14 +645,6 @@ static inline void memcg_free_cache_params(struct kmem_cache *s)
{
}
-static inline void memcg_register_cache(struct kmem_cache *s)
-{
-}
-
-static inline void memcg_unregister_cache(struct kmem_cache *s)
-{
-}
-
static inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4ca3d951fe91..010d125bffbf 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -187,14 +187,8 @@ extern void put_page_bootmem(struct page *page);
extern void get_page_bootmem(unsigned long ingo, struct page *page,
unsigned long type);
-/*
- * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
- * notifier will be called under this. 2) offline/online/add/remove memory
- * will not run simultaneously.
- */
-
-void lock_memory_hotplug(void);
-void unlock_memory_hotplug(void);
+void get_online_mems(void);
+void put_online_mems(void);
#else /* ! CONFIG_MEMORY_HOTPLUG */
/*
@@ -232,8 +226,8 @@ static inline int try_online_node(int nid)
return 0;
}
-static inline void lock_memory_hotplug(void) {}
-static inline void unlock_memory_hotplug(void) {}
+static inline void get_online_mems(void) {}
+static inline void put_online_mems(void) {}
#endif /* ! CONFIG_MEMORY_HOTPLUG */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 84a31ad0b791..a2901c414664 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -5,7 +5,9 @@
#include <linux/mempolicy.h>
#include <linux/migrate_mode.h>
-typedef struct page *new_page_t(struct page *, unsigned long private, int **);
+typedef struct page *new_page_t(struct page *page, unsigned long private,
+ int **reason);
+typedef void free_page_t(struct page *page, unsigned long private);
/*
* Return values from addresss_space_operations.migratepage():
@@ -38,7 +40,7 @@ enum migrate_reason {
extern void putback_movable_pages(struct list_head *l);
extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
-extern int migrate_pages(struct list_head *l, new_page_t x,
+extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
unsigned long private, enum migrate_mode mode, int reason);
extern int migrate_prep(void);
@@ -56,8 +58,9 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
#else
static inline void putback_movable_pages(struct list_head *l) {}
-static inline int migrate_pages(struct list_head *l, new_page_t x,
- unsigned long private, enum migrate_mode mode, int reason)
+static inline int migrate_pages(struct list_head *l, new_page_t new,
+ free_page_t free, unsigned long private, enum migrate_mode mode,
+ int reason)
{ return -ENOSYS; }
static inline int migrate_prep(void) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d6777060449f..b265a3f13f31 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -407,20 +407,25 @@ static inline void compound_unlock_irqrestore(struct page *page,
#endif
}
+static inline struct page *compound_head_by_tail(struct page *tail)
+{
+ struct page *head = tail->first_page;
+
+ /*
+ * page->first_page may be a dangling pointer to an old
+ * compound page, so recheck that it is still a tail
+ * page before returning.
+ */
+ smp_rmb();
+ if (likely(PageTail(tail)))
+ return head;
+ return tail;
+}
+
static inline struct page *compound_head(struct page *page)
{
- if (unlikely(PageTail(page))) {
- struct page *head = page->first_page;
-
- /*
- * page->first_page may be a dangling pointer to an old
- * compound page, so recheck that it is still a tail
- * page before returning.
- */
- smp_rmb();
- if (likely(PageTail(page)))
- return head;
- }
+ if (unlikely(PageTail(page)))
+ return compound_head_by_tail(page);
return page;
}
@@ -1098,10 +1103,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
* @pte_entry: if set, called for each non-empty PTE (4th-level) entry
* @pte_hole: if set, called for each hole at all levels
* @hugetlb_entry: if set, called for each hugetlb entry
- * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry
- * is used.
+ * @test_walk: caller specific callback function to determine whether
+ * we walk over the current vma or not. A positive returned
+ * value means "do page table walk over the current vma,"
+ * and a negative one means "abort current page table walk
+ * right now." 0 means "skip the current vma."
+ * @mm: mm_struct representing the target process of page table walk
+ * @vma: vma currently walked
+ * @skip: internal control flag which is set when we skip the lower
+ * level entries.
+ * @private: private data for callbacks' use
*
- * (see walk_page_range for more details)
+ * (see the comment on walk_page_range() for more details)
*/
struct mm_walk {
int (*pgd_entry)(pgd_t *pgd, unsigned long addr,
@@ -1114,15 +1127,19 @@ struct mm_walk {
unsigned long next, struct mm_walk *walk);
int (*pte_hole)(unsigned long addr, unsigned long next,
struct mm_walk *walk);
- int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long next,
- struct mm_walk *walk);
+ int (*hugetlb_entry)(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
+ int (*test_walk)(unsigned long addr, unsigned long next,
+ struct mm_walk *walk);
struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ int skip;
void *private;
};
int walk_page_range(unsigned long addr, unsigned long end,
struct mm_walk *walk);
+int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8967e20cbe57..de1627232af0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -406,7 +406,7 @@ struct mm_struct {
spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table;
#endif
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
/*
* "owner" points to a task that is regarded as the canonical
* user/owner of this mm. All of the following must be true in
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2d57efa64cc1..edd82a105220 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,6 +1,8 @@
#ifndef LINUX_MM_DEBUG_H
#define LINUX_MM_DEBUG_H 1
+#include <linux/stringify.h>
+
struct page;
extern void dump_page(struct page *page, const char *reason);
@@ -9,11 +11,20 @@ extern void dump_page_badflags(struct page *page, const char *reason,
#ifdef CONFIG_DEBUG_VM
#define VM_BUG_ON(cond) BUG_ON(cond)
-#define VM_BUG_ON_PAGE(cond, page) \
- do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0)
+#define VM_BUG_ON_PAGE(cond, page) \
+ do { \
+ if (unlikely(cond)) { \
+ dump_page(page, "VM_BUG_ON_PAGE(" __stringify(cond)")");\
+ BUG(); \
+ } \
+ } while (0)
+#define VM_WARN_ON(cond) WARN_ON(cond)
+#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
#else
#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#endif
#ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fac5509c18f0..6cbd1b6c3d20 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -75,9 +75,18 @@ enum {
extern int page_group_by_mobility_disabled;
-static inline int get_pageblock_migratetype(struct page *page)
+#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
+#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
+
+#define get_pageblock_migratetype(page) \
+ get_pfnblock_flags_mask(page, page_to_pfn(page), \
+ PB_migrate_end, MIGRATETYPE_MASK)
+
+static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
{
- return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
+ BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
+ return get_pfnblock_flags_mask(page, pfn, PB_migrate_end,
+ MIGRATETYPE_MASK);
}
struct free_area {
@@ -360,9 +369,10 @@ struct zone {
/* Set to true when the PG_migrate_skip bits should be cleared */
bool compact_blockskip_flush;
- /* pfns where compaction scanners should start */
+ /* pfn where compaction free scanner should start */
unsigned long compact_cached_free_pfn;
- unsigned long compact_cached_migrate_pfn;
+ /* pfn where async and sync compaction migration scanner should start */
+ unsigned long compact_cached_migrate_pfn[2];
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
@@ -481,9 +491,8 @@ struct zone {
* give them a chance of being in the same cacheline.
*
* Write access to present_pages at runtime should be protected by
- * lock_memory_hotplug()/unlock_memory_hotplug(). Any reader who can't
- * tolerant drift of present_pages should hold memory hotplug lock to
- * get a stable value.
+ * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
+ * present_pages should get_online_mems() to get a stable value.
*
* Read access to managed_pages should be safe because it's unsigned
* long. Write access to zone->managed_pages and totalram_pages are
@@ -763,10 +772,10 @@ typedef struct pglist_data {
unsigned long node_spanned_pages; /* total size of physical page
range, including holes */
int node_id;
- nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */
wait_queue_head_t kswapd_wait;
wait_queue_head_t pfmemalloc_wait;
- struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
+ struct task_struct *kswapd; /* Protected by
+ mem_hotplug_begin/end() */
int kswapd_max_order;
enum zone_type classzone_idx;
#ifdef CONFIG_NUMA_BALANCING
@@ -808,10 +817,10 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
extern struct mutex zonelists_mutex;
void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
-bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int alloc_flags);
-bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int alloc_flags);
+bool zone_watermark_ok(struct zone *z, unsigned int order,
+ unsigned long mark, int classzone_idx, int alloc_flags);
+bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+ unsigned long mark, int classzone_idx, int alloc_flags);
enum memmap_context {
MEMMAP_EARLY,
MEMMAP_HOTPLUG,
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 6a45fb583ff1..a17ab6398d7c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void)
#ifdef arch_trigger_all_cpu_backtrace
static inline bool trigger_all_cpu_backtrace(void)
{
- arch_trigger_all_cpu_backtrace();
+ arch_trigger_all_cpu_backtrace(true);
return true;
}
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+ arch_trigger_all_cpu_backtrace(false);
+ return true;
+}
#else
static inline bool trigger_all_cpu_backtrace(void)
{
return false;
}
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+ return false;
+}
#endif
#ifdef CONFIG_LOCKUP_DETECTOR
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index ca71a1d347a0..8304959ad336 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -198,6 +198,7 @@ struct page; /* forward declaration */
TESTPAGEFLAG(Locked, locked)
PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
+ __SETPAGEFLAG(Referenced, referenced)
PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
@@ -208,6 +209,7 @@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
PAGEFLAG(SavePinned, savepinned); /* Xen */
PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
+ __SETPAGEFLAG(SwapBacked, swapbacked)
__PAGEFLAG(SlobFree, slob_free)
@@ -358,6 +360,9 @@ static inline void ClearPageCompound(struct page *page)
ClearPageHead(page);
}
#endif
+
+#define PG_head_mask ((1L << PG_head))
+
#else
/*
* Reduce page flag use as much as possible by overlapping
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 2ee8cd2466b5..2baeee12f48e 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -30,9 +30,12 @@ enum pageblock_bits {
PB_migrate,
PB_migrate_end = PB_migrate + 3 - 1,
/* 3 bits required for migrate types */
-#ifdef CONFIG_COMPACTION
PB_migrate_skip,/* If set the block is skipped by compaction */
-#endif /* CONFIG_COMPACTION */
+
+ /*
+ * Assume the bits will always align on a word. If this assumption
+ * changes then get/set pageblock needs updating.
+ */
NR_PAGEBLOCK_BITS
};
@@ -62,11 +65,26 @@ extern int pageblock_order;
/* Forward declaration */
struct page;
+unsigned long get_pfnblock_flags_mask(struct page *page,
+ unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask);
+
+void set_pfnblock_flags_mask(struct page *page,
+ unsigned long flags,
+ unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask);
+
/* Declarations for getting and setting flags. See mm/page_alloc.c */
-unsigned long get_pageblock_flags_group(struct page *page,
- int start_bitidx, int end_bitidx);
-void set_pageblock_flags_group(struct page *page, unsigned long flags,
- int start_bitidx, int end_bitidx);
+#define get_pageblock_flags_group(page, start_bitidx, end_bitidx) \
+ get_pfnblock_flags_mask(page, page_to_pfn(page), \
+ end_bitidx, \
+ (1 << (end_bitidx - start_bitidx + 1)) - 1)
+#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \
+ set_pfnblock_flags_mask(page, flags, page_to_pfn(page), \
+ end_bitidx, \
+ (1 << (end_bitidx - start_bitidx + 1)) - 1)
#ifdef CONFIG_COMPACTION
#define get_pageblock_skip(page) \
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 45598f1e9aa3..c74f8bbef87a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -110,7 +110,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
#define page_cache_get(page) get_page(page)
#define page_cache_release(page) put_page(page)
-void release_pages(struct page **pages, int nr, int cold);
+void release_pages(struct page **pages, int nr, bool cold);
/*
* speculatively take a reference to a page.
@@ -259,12 +259,109 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
pgoff_t page_cache_prev_hole(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
+#define FGP_ACCESSED 0x00000001
+#define FGP_LOCK 0x00000002
+#define FGP_CREAT 0x00000004
+#define FGP_WRITE 0x00000008
+#define FGP_NOFS 0x00000010
+#define FGP_NOWAIT 0x00000020
+
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
+ int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask);
+
+/**
+ * find_get_page - find and get a page reference
+ * @mapping: the address_space to search
+ * @offset: the page index
+ *
+ * Looks up the page cache slot at @mapping & @offset. If there is a
+ * page cache page, it is returned with an increased refcount.
+ *
+ * Otherwise, %NULL is returned.
+ */
+static inline struct page *find_get_page(struct address_space *mapping,
+ pgoff_t offset)
+{
+ return pagecache_get_page(mapping, offset, 0, 0, 0);
+}
+
+static inline struct page *find_get_page_flags(struct address_space *mapping,
+ pgoff_t offset, int fgp_flags)
+{
+ return pagecache_get_page(mapping, offset, fgp_flags, 0, 0);
+}
+
+/**
+ * find_lock_page - locate, pin and lock a pagecache page
+ * pagecache_get_page - find and get a page reference
+ * @mapping: the address_space to search
+ * @offset: the page index
+ *
+ * Looks up the page cache slot at @mapping & @offset. If there is a
+ * page cache page, it is returned locked and with an increased
+ * refcount.
+ *
+ * Otherwise, %NULL is returned.
+ *
+ * find_lock_page() may sleep.
+ */
+static inline struct page *find_lock_page(struct address_space *mapping,
+ pgoff_t offset)
+{
+ return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0);
+}
+
+/**
+ * find_or_create_page - locate or add a pagecache page
+ * @mapping: the page's address_space
+ * @index: the page's index into the mapping
+ * @gfp_mask: page allocation mode
+ *
+ * Looks up the page cache slot at @mapping & @offset. If there is a
+ * page cache page, it is returned locked and with an increased
+ * refcount.
+ *
+ * If the page is not present, a new page is allocated using @gfp_mask
+ * and added to the page cache and the VM's LRU list. The page is
+ * returned locked and with an increased refcount.
+ *
+ * On memory exhaustion, %NULL is returned.
+ *
+ * find_or_create_page() may sleep, even if @gfp_flags specifies an
+ * atomic allocation!
+ */
+static inline struct page *find_or_create_page(struct address_space *mapping,
+ pgoff_t offset, gfp_t gfp_mask)
+{
+ return pagecache_get_page(mapping, offset,
+ FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
+ gfp_mask, gfp_mask & GFP_RECLAIM_MASK);
+}
+
+/**
+ * grab_cache_page_nowait - returns locked page at given index in given cache
+ * @mapping: target address_space
+ * @index: the page index
+ *
+ * Same as grab_cache_page(), but do not wait if the page is unavailable.
+ * This is intended for speculative data generators, where the data can
+ * be regenerated if the page couldn't be grabbed. This routine should
+ * be safe to call while holding the lock for another page.
+ *
+ * Clear __GFP_FS when allocating the page to avoid recursion into the fs
+ * and deadlock against the caller's locked page.
+ */
+static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
+ pgoff_t index)
+{
+ return pagecache_get_page(mapping, index,
+ FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
+ mapping_gfp_mask(mapping),
+ GFP_NOFS);
+}
+
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
-struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
-struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
- gfp_t gfp_mask);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
unsigned int nr_entries, struct page **entries,
pgoff_t *indices);
@@ -287,8 +384,6 @@ static inline struct page *grab_cache_page(struct address_space *mapping,
return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
}
-extern struct page * grab_cache_page_nowait(struct address_space *mapping,
- pgoff_t index);
extern struct page * read_cache_page(struct address_space *mapping,
pgoff_t index, filler_t *filler, void *data);
extern struct page * read_cache_page_gfp(struct address_space *mapping,
@@ -316,6 +411,34 @@ static inline loff_t page_file_offset(struct page *page)
return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
}
+/*
+ * Get the order of a given page in the context of the pagecache which it
+ * belongs to.
+ *
+ * Pagecache unit size is not a fixed value (hugetlbfs is an example), but the
+ * vma_interval_tree and anon_vma_interval_tree APIs assume that indices are in
+ * PAGE_SIZE units. So this function helps us to get normalized indices.
+ *
+ * page_size_order() should be called only for pagecache pages/hugepages and
+ * anonymous pages/hugepages, because pagecache unit size is irrelevant except
+ * for those pages.
+ */
+static inline unsigned int page_size_order(struct page *page)
+{
+ return unlikely(PageHuge(page)) ?
+ compound_order(compound_head(page)) :
+ (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+}
+
+/*
+ * page->index stores pagecache index whose unit is not always PAGE_SIZE.
+ * This function converts it into PAGE_SIZE offset.
+ */
+static inline pgoff_t page_pgoff(struct page *page)
+{
+ return page->index << page_size_order(page);
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
@@ -425,6 +548,8 @@ static inline void wait_on_page_writeback(struct page *page)
extern void end_page_writeback(struct page *page);
void wait_for_stable_page(struct page *page);
+void page_endio(struct page *page, int rw, int err);
+
/*
* Add an arbitrary waiter to a page's wait queue
*/
diff --git a/include/linux/plist.h b/include/linux/plist.h
index aa0fb390bd29..8b6c970cff6c 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -98,6 +98,13 @@ struct plist_node {
}
/**
+ * PLIST_HEAD - declare and init plist_head
+ * @head: name for struct plist_head variable
+ */
+#define PLIST_HEAD(head) \
+ struct plist_head head = PLIST_HEAD_INIT(head)
+
+/**
* PLIST_NODE_INIT - static struct plist_node initializer
* @node: struct plist_node variable name
* @__prio: initial node priority
@@ -134,6 +141,8 @@ static inline void plist_node_init(struct plist_node *node, int prio)
extern void plist_add(struct plist_node *node, struct plist_head *head);
extern void plist_del(struct plist_node *node, struct plist_head *head);
+extern void plist_requeue(struct plist_node *node, struct plist_head *head);
+
/**
* plist_for_each - iterate over the plist
* @pos: the type * to use as a loop counter
@@ -143,6 +152,16 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
list_for_each_entry(pos, &(head)->node_list, node_list)
/**
+ * plist_for_each_continue - continue iteration over the plist
+ * @pos: the type * to use as a loop cursor
+ * @head: the head for your list
+ *
+ * Continue to iterate over plist, continuing after the current position.
+ */
+#define plist_for_each_continue(pos, head) \
+ list_for_each_entry_continue(pos, &(head)->node_list, node_list)
+
+/**
* plist_for_each_safe - iterate safely over a plist of given type
* @pos: the type * to use as a loop counter
* @n: another type * to use as temporary storage
@@ -163,6 +182,18 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
list_for_each_entry(pos, &(head)->node_list, mem.node_list)
/**
+ * plist_for_each_entry_continue - continue iteration over list of given type
+ * @pos: the type * to use as a loop cursor
+ * @head: the head for your list
+ * @m: the name of the list_struct within the struct
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define plist_for_each_entry_continue(pos, head, m) \
+ list_for_each_entry_continue(pos, &(head)->node_list, m.node_list)
+
+/**
* plist_for_each_entry_safe - iterate safely over list of given type
* @pos: the type * to use as a loop counter
* @n: another type * to use as temporary storage
@@ -229,6 +260,20 @@ static inline int plist_node_empty(const struct plist_node *node)
#endif
/**
+ * plist_next - get the next entry in list
+ * @pos: the type * to cursor
+ */
+#define plist_next(pos) \
+ list_next_entry(pos, node_list)
+
+/**
+ * plist_prev - get the prev entry in list
+ * @pos: the type * to cursor
+ */
+#define plist_prev(pos) \
+ list_prev_entry(pos, node_list)
+
+/**
* plist_first - return the first node (and thus, highest priority)
* @head: the &struct plist_head pointer
*
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8752f7595b27..37f3a6589c1c 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -128,9 +128,9 @@ asmlinkage __printf(1, 2) __cold
int printk(const char *fmt, ...);
/*
- * Special printk facility for scheduler use only, _DO_NOT_USE_ !
+ * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
*/
-__printf(1, 2) __cold int printk_sched(const char *fmt, ...);
+__printf(1, 2) __cold int printk_deferred(const char *fmt, ...);
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
@@ -165,7 +165,7 @@ int printk(const char *s, ...)
return 0;
}
static inline __printf(1, 2) __cold
-int printk_sched(const char *s, ...)
+int printk_deferred(const char *s, ...)
{
return 0;
}
@@ -210,6 +210,12 @@ extern asmlinkage void dump_stack(void) __cold;
#define pr_fmt(fmt) fmt
#endif
+/*
+ * These can be used to print at the various log levels.
+ * All of these will print unconditionally, although note that pr_debug()
+ * and other debug macros are compiled out unless either DEBUG is defined
+ * or CONFIG_DYNAMIC_DEBUG is set.
+ */
#define pr_emerg(fmt, ...) \
printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_alert(fmt, ...) \
@@ -266,9 +272,20 @@ extern asmlinkage void dump_stack(void) __cold;
printk(fmt, ##__VA_ARGS__); \
} \
})
+#define printk_deferred_once(fmt, ...) \
+({ \
+ static bool __print_once __read_mostly; \
+ \
+ if (!__print_once) { \
+ __print_once = true; \
+ printk_deferred(fmt, ##__VA_ARGS__); \
+ } \
+})
#else
#define printk_once(fmt, ...) \
no_printk(fmt, ##__VA_ARGS__)
+#define printk_deferred_once(fmt, ...) \
+ no_printk(fmt, ##__VA_ARGS__)
#endif
#define pr_emerg_once(fmt, ...) \
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 608e60a74c3c..9d117f61d976 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -44,6 +44,10 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
#else /* CONFIG_PROC_FS */
+static inline void proc_root_init(void)
+{
+}
+
static inline void proc_flush_task(struct task_struct *task)
{
}
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 07d0df6bf768..077904c8b70d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -5,6 +5,7 @@
#include <linux/sched.h> /* For struct task_struct. */
#include <linux/err.h> /* for IS_ERR_VALUE */
#include <linux/bug.h> /* For BUG_ON. */
+#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
#include <uapi/linux/ptrace.h>
/*
@@ -129,6 +130,37 @@ static inline void ptrace_event(int event, unsigned long message)
}
/**
+ * ptrace_event_pid - possibly stop for a ptrace event notification
+ * @event: %PTRACE_EVENT_* value to report
+ * @pid: process identifier for %PTRACE_GETEVENTMSG to return
+ *
+ * Check whether @event is enabled and, if so, report @event and @pid
+ * to the ptrace parent. @pid is reported as the pid_t seen from the
+ * the ptrace parent's pid namespace.
+ *
+ * Called without locks.
+ */
+static inline void ptrace_event_pid(int event, struct pid *pid)
+{
+ /*
+ * FIXME: There's a potential race if a ptracer in a different pid
+ * namespace than parent attaches between computing message below and
+ * when we acquire tasklist_lock in ptrace_stop(). If this happens,
+ * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG.
+ */
+ unsigned long message = 0;
+ struct pid_namespace *ns;
+
+ rcu_read_lock();
+ ns = task_active_pid_ns(rcu_dereference(current->parent));
+ if (ns)
+ message = pid_nr_ns(pid, ns);
+ rcu_read_unlock();
+
+ ptrace_event(event, message);
+}
+
+/**
* ptrace_init_task - initialize ptrace state for a new child
* @child: new child task
* @ptrace: true if child should be ptrace'd by parent's tracer
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 56b7bc32db4f..b810855024f9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -40,6 +40,11 @@ struct res_counter {
*/
unsigned long long soft_limit;
/*
+ * the limit under which the usage cannot be pushed
+ * due to external pressure.
+ */
+ unsigned long long low_limit;
+ /*
* the number of unsuccessful attempts to consume the resource
*/
unsigned long long failcnt;
@@ -88,6 +93,7 @@ enum {
RES_LIMIT,
RES_FAILCNT,
RES_SOFT_LIMIT,
+ RES_LOW_LIMIT,
};
/*
@@ -175,6 +181,28 @@ res_counter_soft_limit_excess(struct res_counter *cnt)
return excess;
}
+/**
+ * Get the difference between the usage and the low limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to low limit
+ * The difference between usage and low limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_low_limit_excess(struct res_counter *cnt)
+{
+ unsigned long long excess;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ if (cnt->usage <= cnt->low_limit)
+ excess = 0;
+ else
+ excess = cnt->usage - cnt->low_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return excess;
+}
+
static inline void res_counter_reset_max(struct res_counter *cnt)
{
unsigned long flags;
@@ -220,4 +248,16 @@ res_counter_set_soft_limit(struct res_counter *cnt,
return 0;
}
+static inline int
+res_counter_set_low_limit(struct res_counter *cnt,
+ unsigned long long low_limit)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ cnt->low_limit = low_limit;
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return 0;
+}
+
#endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b66c2110cb1f..9be55c7617da 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -183,14 +183,10 @@ static inline void page_dup_rmap(struct page *page)
*/
int page_referenced(struct page *, int is_locked,
struct mem_cgroup *memcg, unsigned long *vm_flags);
-int page_referenced_one(struct page *, struct vm_area_struct *,
- unsigned long address, void *arg);
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
int try_to_unmap(struct page *, enum ttu_flags flags);
-int try_to_unmap_one(struct page *, struct vm_area_struct *,
- unsigned long address, void *arg);
/*
* Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index a964f7285600..4b152c81c5fa 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
BUG();
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 59002c5dd9f5..9112646911bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,12 +137,6 @@ struct filename;
#define VMACACHE_MASK (VMACACHE_SIZE - 1)
/*
- * List of flags we want to share for kernel threads,
- * if only because they are not used by them anyway.
- */
-#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
-
-/*
* These are the constant used to fake the fixed-point load-average
* counting. Some notes:
* - 11 bit fractions expand to 22 bits by the multiplies: this gives
@@ -745,7 +739,6 @@ static inline int signal_group_exit(const struct signal_struct *sig)
struct user_struct {
atomic_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
- atomic_t files; /* How many open files does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_INOTIFY_USER
atomic_t inotify_watches; /* How many inotify watches does this user have? */
@@ -2421,9 +2414,6 @@ extern void flush_itimer_signals(void);
extern void do_group_exit(int);
-extern int allow_signal(int);
-extern int disallow_signal(int);
-
extern int do_execve(struct filename *,
const char __user * const __user *,
const char __user * const __user *);
@@ -2967,7 +2957,7 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
#else
@@ -2978,7 +2968,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
}
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 8045a554cafb..596a0e007c62 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -25,6 +25,10 @@ enum { sysctl_hung_task_timeout_secs = 0 };
* Because the kernel adds some informative sections to a image of program at
* generating coredump, we need some margin. The number of extra sections is
* 1-3 now and depends on arch. We use "5" as safe margin, here.
+ *
+ * ELF extended numbering allows more than 65535 sections, so 16-bit bound is
+ * not a hard limit any more. Although some userspace tools can be surprised by
+ * that.
*/
#define MAPCOUNT_ELF_CORE_MARGIN (5)
#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 1e2cd2e6b540..57d77709fbe2 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -3,9 +3,8 @@
#include <asm/page.h>
#include <uapi/linux/shm.h>
-
-#define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */
#include <asm/shmparam.h>
+
struct shmid_kernel /* private to the kernel */
{
struct kern_ipc_perm shm_perm;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 2ac423bdb676..c9e65360c49a 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -63,11 +63,6 @@ static inline int sigismember(sigset_t *set, int _sig)
return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW));
}
-static inline int sigfindinword(unsigned long word)
-{
- return ffz(~word);
-}
-
#endif /* __HAVE_ARCH_SIG_BITOPS */
static inline int sigisemptyset(sigset_t *set)
@@ -289,6 +284,22 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
extern void exit_signals(struct task_struct *tsk);
+extern void kernel_sigaction(int, __sighandler_t);
+
+static inline void allow_signal(int sig)
+{
+ /*
+ * Kernel threads handle their own signals. Let the signal code
+ * know it'll be handled, so that they don't get converted to
+ * SIGKILL or just silently dropped.
+ */
+ kernel_sigaction(sig, (__force __sighandler_t)2);
+}
+
+static inline void disallow_signal(int sig)
+{
+ kernel_sigaction(sig, SIG_IGN);
+}
/*
* Eventually that'll replace get_signal_to_deliver(); macro for now,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 307bfbe62387..1d9abb7d22a0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,7 +116,9 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long,
void (*)(void *));
#ifdef CONFIG_MEMCG_KMEM
-void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *);
+struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *,
+ struct kmem_cache *,
+ const char *);
#endif
void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *);
@@ -369,16 +371,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h>
#endif
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
- void *ret;
-
- flags |= (__GFP_COMP | __GFP_KMEMCG);
- ret = (void *) __get_free_pages(flags, order);
- kmemleak_alloc(ret, size, 1, flags);
- return ret;
-}
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
@@ -533,10 +526,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* @memcg: pointer to the memcg this cache belongs to
* @list: list_head for the list of all caches in this memcg
* @root_cache: pointer to the global, root cache, this cache was derived from
- * @dead: set to true after the memcg dies; the cache may still be around.
* @nr_pages: number of pages that belongs to this cache.
- * @destroy: worker to be called whenever we are ready, or believe we may be
- * ready, to destroy this cache.
*/
struct memcg_cache_params {
bool is_root_cache;
@@ -549,9 +539,7 @@ struct memcg_cache_params {
struct mem_cgroup *memcg;
struct list_head list;
struct kmem_cache *root_cache;
- bool dead;
atomic_t nr_pages;
- struct work_struct destroy;
};
};
};
diff --git a/include/linux/string.h b/include/linux/string.h
index ac889c5ea11b..f29f9a0b7265 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -114,6 +114,7 @@ void *memchr_inv(const void *s, int c, size_t n);
extern char *kstrdup(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+extern char *kstrimdup(const char *s, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 350711560753..4348d95e571f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,8 +214,9 @@ struct percpu_cluster {
struct swap_info_struct {
unsigned long flags; /* SWP_USED etc: see above */
signed short prio; /* swap priority of this type */
+ struct plist_node list; /* entry in swap_active_head */
+ struct plist_node avail_list; /* entry in swap_avail_head */
signed char type; /* strange name for an index */
- signed char next; /* next type on the swap list */
unsigned int max; /* extent of the swap_map */
unsigned char *swap_map; /* vmalloc'ed array of usage counts */
struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
@@ -255,11 +256,6 @@ struct swap_info_struct {
struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
};
-struct swap_list_t {
- int head; /* head of priority-ordered swapfile list */
- int next; /* swapfile to be used next */
-};
-
/* linux/mm/workingset.c */
void *workingset_eviction(struct address_space *mapping, struct page *page);
bool workingset_refault(void *shadow);
@@ -308,12 +304,14 @@ extern unsigned long nr_free_pagecache_pages(void);
/* linux/mm/swap.c */
-extern void __lru_cache_add(struct page *);
extern void lru_cache_add(struct page *);
+extern void lru_cache_add_anon(struct page *page);
+extern void lru_cache_add_file(struct page *page);
extern void lru_add_page_tail(struct page *page, struct page *page_tail,
struct lruvec *lruvec, struct list_head *head);
extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *);
+extern void init_page_accessed(struct page *page);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_all(void);
@@ -323,22 +321,6 @@ extern void swap_setup(void);
extern void add_page_to_unevictable_list(struct page *page);
-/**
- * lru_cache_add: add a page to the page lists
- * @page: the page to add
- */
-static inline void lru_cache_add_anon(struct page *page)
-{
- ClearPageActive(page);
- __lru_cache_add(page);
-}
-
-static inline void lru_cache_add_file(struct page *page)
-{
- ClearPageActive(page);
- __lru_cache_add(page);
-}
-
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
@@ -496,7 +478,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
#define free_page_and_swap_cache(page) \
page_cache_release(page)
#define free_pages_and_swap_cache(pages, nr) \
- release_pages((pages), (nr), 0);
+ release_pages((pages), (nr), false);
static inline void show_swap_cache_info(void)
{
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e282624e8c10..388293a91e8c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,7 +6,7 @@
* want to expose them to the dozens of source files that include swap.h
*/
extern spinlock_t swap_lock;
-extern struct swap_list_t swap_list;
+extern struct plist_head swap_active_head;
extern struct swap_info_struct *swap_info[];
extern int try_to_unuse(unsigned int, bool, unsigned long);
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index c0f75261a728..6adfb7bfbf44 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
- return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
+ return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte);
}
#endif
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a5ffd32642fd..e7a018eaf3a2 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { }
#endif
extern void swiotlb_print_info(void);
+extern int is_swiotlb_buffer(phys_addr_t paddr);
+
#endif /* __LINUX_SWIOTLB_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a4a0588c5397..b0881a0ed322 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -711,7 +711,7 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
asmlinkage long sys_ioprio_get(int which, int who);
-asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
+asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask,
unsigned long maxnode);
asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
const unsigned long __user *from,
@@ -723,7 +723,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
int flags);
asmlinkage long sys_mbind(unsigned long start, unsigned long len,
unsigned long mode,
- unsigned long __user *nmask,
+ const unsigned long __user *nmask,
unsigned long maxnode,
unsigned flags);
asmlinkage long sys_get_mempolicy(int __user *policy,
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cb0cec94fda3..ff307b548ed3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
/*
* flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 973671ff9e7d..dda6ee521e74 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -58,7 +58,8 @@ int arch_update_cpu_topology(void);
/*
* If the distance between nodes in a system is larger than RECLAIM_DISTANCE
* (in whatever arch specific measurement units returned by node_distance())
- * then switch on zone reclaim on boot.
+ * and zone_reclaim_mode is enabled then the VM will only call zone_reclaim()
+ * on nodes within this distance.
*/
#define RECLAIM_DISTANCE 30
#endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 486c3972c0be..ced92345c963 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -80,6 +80,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NR_TLB_LOCAL_FLUSH_ALL,
NR_TLB_LOCAL_FLUSH_ONE,
#endif /* CONFIG_DEBUG_TLBFLUSH */
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+ VMACACHE_FIND_CALLS,
+ VMACACHE_FIND_HITS,
+#endif
NR_VM_EVENT_ITEMS
};
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 45c9cd1daf7a..82e7db7f7100 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -95,6 +95,12 @@ static inline void vm_events_fold_cpu(int cpu)
#define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
#endif
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+#define count_vm_vmacache_event(x) count_vm_event(x)
+#else
+#define count_vm_vmacache_event(x) do {} while (0)
+#endif
+
#define __count_zone_vm_events(item, zone, delta) \
__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
zone_idx(zone), delta)
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 2571a5cfa5fc..13af0d450bf6 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
unsigned long *handle);
void zbud_free(struct zbud_pool *pool, unsigned long handle);
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 0a4edfe8af51..d34cf2df093b 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
* Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
*/
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
#else
#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 06f544ef2f6f..c6814b917bdf 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -5,6 +5,7 @@
#define _TRACE_COMPACTION_H
#include <linux/types.h>
+#include <linux/list.h>
#include <linux/tracepoint.h>
#include <trace/events/gfpflags.h>
@@ -47,10 +48,11 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
TRACE_EVENT(mm_compaction_migratepages,
- TP_PROTO(unsigned long nr_migrated,
- unsigned long nr_failed),
+ TP_PROTO(unsigned long nr_all,
+ int migrate_rc,
+ struct list_head *migratepages),
- TP_ARGS(nr_migrated, nr_failed),
+ TP_ARGS(nr_all, migrate_rc, migratepages),
TP_STRUCT__entry(
__field(unsigned long, nr_migrated)
@@ -58,7 +60,22 @@ TRACE_EVENT(mm_compaction_migratepages,
),
TP_fast_assign(
- __entry->nr_migrated = nr_migrated;
+ unsigned long nr_failed = 0;
+ struct list_head *page_lru;
+
+ /*
+ * migrate_pages() returns either a non-negative number
+ * with the number of pages that failed migration, or an
+ * error code, in which case we need to count the remaining
+ * pages manually
+ */
+ if (migrate_rc >= 0)
+ nr_failed = migrate_rc;
+ else
+ list_for_each(page_lru, migratepages)
+ nr_failed++;
+
+ __entry->nr_migrated = nr_all - nr_failed;
__entry->nr_failed = nr_failed;
),
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
- {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 132a985aba8b..69590b6ffc09 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -191,6 +191,7 @@ TRACE_EVENT(mm_shrink_slab_start,
TP_STRUCT__entry(
__field(struct shrinker *, shr)
__field(void *, shrink)
+ __field(int, nid)
__field(long, nr_objects_to_shrink)
__field(gfp_t, gfp_flags)
__field(unsigned long, pgs_scanned)
@@ -203,6 +204,7 @@ TRACE_EVENT(mm_shrink_slab_start,
TP_fast_assign(
__entry->shr = shr;
__entry->shrink = shr->scan_objects;
+ __entry->nid = sc->nid;
__entry->nr_objects_to_shrink = nr_objects_to_shrink;
__entry->gfp_flags = sc->gfp_mask;
__entry->pgs_scanned = pgs_scanned;
@@ -212,9 +214,10 @@ TRACE_EVENT(mm_shrink_slab_start,
__entry->total_scan = total_scan;
),
- TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
+ TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
__entry->shrink,
__entry->shr,
+ __entry->nid,
__entry->nr_objects_to_shrink,
show_gfp_flags(__entry->gfp_flags),
__entry->pgs_scanned,
@@ -225,13 +228,15 @@ TRACE_EVENT(mm_shrink_slab_start,
);
TRACE_EVENT(mm_shrink_slab_end,
- TP_PROTO(struct shrinker *shr, int shrinker_retval,
- long unused_scan_cnt, long new_scan_cnt),
+ TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval,
+ long unused_scan_cnt, long new_scan_cnt, long total_scan),
- TP_ARGS(shr, shrinker_retval, unused_scan_cnt, new_scan_cnt),
+ TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt,
+ total_scan),
TP_STRUCT__entry(
__field(struct shrinker *, shr)
+ __field(int, nid)
__field(void *, shrink)
__field(long, unused_scan)
__field(long, new_scan)
@@ -241,16 +246,18 @@ TRACE_EVENT(mm_shrink_slab_end,
TP_fast_assign(
__entry->shr = shr;
+ __entry->nid = nid;
__entry->shrink = shr->scan_objects;
__entry->unused_scan = unused_scan_cnt;
__entry->new_scan = new_scan_cnt;
__entry->retval = shrinker_retval;
- __entry->total_scan = new_scan_cnt - unused_scan_cnt;
+ __entry->total_scan = total_scan;
),
- TP_printk("%pF %p: unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d",
+ TP_printk("%pF %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d",
__entry->shrink,
__entry->shr,
+ __entry->nid,
__entry->unused_scan,
__entry->new_scan,
__entry->total_scan,
diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index 78b69413f582..3400b6eb1fbe 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -8,19 +8,20 @@
#endif
/*
- * SHMMAX, SHMMNI and SHMALL are upper limits are defaults which can
- * be increased by sysctl
+ * SHMMNI, SHMMAX and SHMALL are the default upper limits which can be
+ * modified by sysctl. Both SHMMAX and SHMALL have their default values
+ * to the maximum limit which is as large as it can be without helping
+ * userspace overflow the values. There is really nothing the kernel
+ * can do to avoid this any further. It is therefore not advised to
+ * make them any larger. These limits are suitable for both 32 and
+ * 64-bit systems.
*/
-
-#define SHMMAX 0x2000000 /* max shared seg size (bytes) */
#define SHMMIN 1 /* min shared seg size (bytes) */
#define SHMMNI 4096 /* max num of segs system wide */
-#ifndef __KERNEL__
-#define SHMALL (SHMMAX/getpagesize()*(SHMMNI/16))
-#endif
+#define SHMMAX (ULONG_MAX - (1UL << 24)) /* max shared seg size (bytes) */
+#define SHMALL (ULONG_MAX - (1UL << 24)) /* max shm system wide (pages) */
#define SHMSEG SHMMNI /* max shared segs per process */
-
/* Obsolete, used only for backwards compatibility and libc5 compiles */
struct shmid_ds {
struct ipc_perm shm_perm; /* operation perms */
diff --git a/init/Kconfig b/init/Kconfig
index af75a277eead..d0af1da2f469 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -933,7 +933,6 @@ config RESOURCE_COUNTERS
config MEMCG
bool "Memory Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS
- select MM_OWNER
select EVENTFD
help
Provides a memory resource controller that manages both anonymous
@@ -951,9 +950,6 @@ config MEMCG
disable memory resource controller and you can avoid overheads.
(and lose benefits of memory resource controller)
- This config option also selects MM_OWNER config option, which
- could in turn add some fork/exit overhead.
-
config MEMCG_SWAP
bool "Memory Resource Controller Swap Extension"
depends on MEMCG && SWAP
@@ -996,6 +992,12 @@ config MEMCG_KMEM
the kmem extension can use it to guarantee that no group of processes
will ever exhaust kernel resources alone.
+ WARNING: Current implementation lacks reclaim support. That means
+ allocation attempts will fail when close to the limit even if there
+ are plenty of kmem available for reclaim. That makes this option
+ unusable in real life so DO NOT SELECT IT unless for development
+ purposes.
+
config CGROUP_HUGETLB
bool "HugeTLB Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS && HUGETLB_PAGE
@@ -1173,9 +1175,6 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
-config MM_OWNER
- bool
-
config SYSFS_DEPRECATED
bool "Enable deprecated sysfs features to support old userspace tools"
depends on SYSFS
@@ -1375,6 +1374,16 @@ config UID16
help
This enables the legacy 16-bit UID syscall wrappers.
+config SGETMASK_SYSCALL
+ bool "sgetmask/ssetmask syscalls support" if EXPERT
+ def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH
+ ---help---
+ sys_sgetmask and sys_ssetmask are obsolete system calls
+ no longer supported in libc but still enabled by default in some
+ architectures.
+
+ If unsure, leave the default option here.
+
config SYSFS_SYSCALL
bool "Sysfs syscall support" if EXPERT
default y
diff --git a/init/main.c b/init/main.c
index 3190d653224a..855c601fbc70 100644
--- a/init/main.c
+++ b/init/main.c
@@ -77,6 +77,7 @@
#include <linux/sched_clock.h>
#include <linux/context_tracking.h>
#include <linux/random.h>
+#include <linux/list.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -400,7 +401,7 @@ static noinline void __init_refok rest_init(void)
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
- kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
+ kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
@@ -528,7 +529,6 @@ asmlinkage __visible void __init start_kernel(void)
page_address_init();
pr_notice("%s", linux_banner);
setup_arch(&command_line);
- mm_init_owner(&init_mm, &init_task);
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
@@ -658,9 +658,7 @@ asmlinkage __visible void __init start_kernel(void)
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
-#ifdef CONFIG_PROC_FS
proc_root_init();
-#endif
cgroup_init();
cpuset_init();
taskstats_init_early();
@@ -695,19 +693,83 @@ static void __init do_ctors(void)
bool initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
+#ifdef CONFIG_KALLSYMS
+struct blacklist_entry {
+ struct list_head next;
+ char *buf;
+};
+
+static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
+
+static int __init initcall_blacklist(char *str)
+{
+ char *str_entry;
+ struct blacklist_entry *entry;
+
+ /* str argument is a comma-separated list of functions */
+ do {
+ str_entry = strsep(&str, ",");
+ if (str_entry) {
+ pr_debug("blacklisting initcall %s\n", str_entry);
+ entry = alloc_bootmem(sizeof(*entry));
+ entry->buf = alloc_bootmem(strlen(str_entry) + 1);
+ strcpy(entry->buf, str_entry);
+ list_add(&entry->next, &blacklisted_initcalls);
+ }
+ } while (str_entry);
+
+ return 0;
+}
+
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ struct list_head *tmp;
+ struct blacklist_entry *entry;
+ char *fn_name;
+
+ fn_name = kasprintf(GFP_KERNEL, "%pf", fn);
+ if (!fn_name)
+ return false;
+
+ list_for_each(tmp, &blacklisted_initcalls) {
+ entry = list_entry(tmp, struct blacklist_entry, next);
+ if (!strcmp(fn_name, entry->buf)) {
+ pr_debug("initcall %s blacklisted\n", fn_name);
+ kfree(fn_name);
+ return true;
+ }
+ }
+
+ kfree(fn_name);
+ return false;
+}
+#else
+static int __init initcall_blacklist(char *str)
+{
+ pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
+ return 0;
+}
+
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ return false;
+}
+#endif
+__setup("initcall_blacklist=", initcall_blacklist);
+
static int __init_or_module do_one_initcall_debug(initcall_t fn)
{
ktime_t calltime, delta, rettime;
unsigned long long duration;
int ret;
- pr_debug("calling %pF @ %i\n", fn, task_pid_nr(current));
+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
calltime = ktime_get();
ret = fn();
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- pr_debug("initcall %pF returned %d after %lld usecs\n",
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
fn, ret, duration);
return ret;
@@ -719,6 +781,9 @@ int __init_or_module do_one_initcall(initcall_t fn)
int ret;
char msgbuf[64];
+ if (initcall_blacklisted(fn))
+ return -EPERM;
+
if (initcall_debug)
ret = do_one_initcall_debug(fn);
else
diff --git a/ipc/compat.c b/ipc/compat.c
index 45d035d4cedc..b5ef4f7946dc 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -30,7 +30,7 @@
#include <linux/ptrace.h>
#include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c
index 90d29f59cac6..ef6f91cc4490 100644
--- a/ipc/compat_mq.c
+++ b/ipc/compat_mq.c
@@ -12,7 +12,7 @@
#include <linux/mqueue.h>
#include <linux/syscalls.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
struct compat_mq_attr {
compat_long_t mq_flags; /* message queue flags */
diff --git a/ipc/msg.c b/ipc/msg.c
index 649853105a5d..7d267d0ba1ad 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,20 +39,25 @@
#include <linux/ipc_namespace.h>
#include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
-/*
- * one msg_receiver structure for each sleeping receiver:
- */
+/* one msg_receiver structure for each sleeping receiver */
struct msg_receiver {
struct list_head r_list;
struct task_struct *r_tsk;
int r_mode;
+ int r_msgflg;
long r_msgtype;
long r_maxsize;
+ /*
+ * Mark r_msg volatile so that the compiler
+ * does not try to get smart and optimize
+ * it. We rely on this for the lockless
+ * receive algorithm.
+ */
struct msg_msg *volatile r_msg;
};
@@ -70,75 +75,6 @@ struct msg_sender {
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
-static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
-static int newque(struct ipc_namespace *, struct ipc_params *);
-#ifdef CONFIG_PROC_FS
-static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
-#endif
-
-/*
- * Scale msgmni with the available lowmem size: the memory dedicated to msg
- * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
- * Also take into account the number of nsproxies created so far.
- * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
- */
-void recompute_msgmni(struct ipc_namespace *ns)
-{
- struct sysinfo i;
- unsigned long allowed;
- int nb_ns;
-
- si_meminfo(&i);
- allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
- / MSGMNB;
- nb_ns = atomic_read(&nr_ipc_ns);
- allowed /= nb_ns;
-
- if (allowed < MSGMNI) {
- ns->msg_ctlmni = MSGMNI;
- return;
- }
-
- if (allowed > IPCMNI / nb_ns) {
- ns->msg_ctlmni = IPCMNI / nb_ns;
- return;
- }
-
- ns->msg_ctlmni = allowed;
-}
-
-void msg_init_ns(struct ipc_namespace *ns)
-{
- ns->msg_ctlmax = MSGMAX;
- ns->msg_ctlmnb = MSGMNB;
-
- recompute_msgmni(ns);
-
- atomic_set(&ns->msg_bytes, 0);
- atomic_set(&ns->msg_hdrs, 0);
- ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
-}
-
-#ifdef CONFIG_IPC_NS
-void msg_exit_ns(struct ipc_namespace *ns)
-{
- free_ipcs(ns, &msg_ids(ns), freeque);
- idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
-}
-#endif
-
-void __init msg_init(void)
-{
- msg_init_ns(&init_ipc_ns);
-
- printk(KERN_INFO "msgmni has been set to %d\n",
- init_ipc_ns.msg_ctlmni);
-
- ipc_init_proc_interface("sysvipc/msg",
- " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
- IPC_MSG_IDS, sysvipc_msg_proc_show);
-}
-
static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
{
struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id);
@@ -227,7 +163,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
{
mss->tsk = current;
- current->state = TASK_INTERRUPTIBLE;
+ __set_current_state(TASK_INTERRUPTIBLE);
list_add_tail(&mss->list, &msq->q_senders);
}
@@ -306,15 +242,14 @@ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
{
struct ipc_namespace *ns;
- struct ipc_ops msg_ops;
+ static const struct ipc_ops msg_ops = {
+ .getnew = newque,
+ .associate = msg_security,
+ };
struct ipc_params msg_params;
ns = current->nsproxy->ipc_ns;
- msg_ops.getnew = newque;
- msg_ops.associate = msg_security;
- msg_ops.more_checks = NULL;
-
msg_params.key = key;
msg_params.flg = msgflg;
@@ -612,63 +547,67 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
static int testmsg(struct msg_msg *msg, long type, int mode)
{
- switch (mode)
- {
- case SEARCH_ANY:
- case SEARCH_NUMBER:
+ switch (mode) {
+ case SEARCH_ANY:
+ case SEARCH_NUMBER:
+ return 1;
+ case SEARCH_LESSEQUAL:
+ if (msg->m_type <= type)
+ return 1;
+ break;
+ case SEARCH_EQUAL:
+ if (msg->m_type == type)
return 1;
- case SEARCH_LESSEQUAL:
- if (msg->m_type <= type)
- return 1;
- break;
- case SEARCH_EQUAL:
- if (msg->m_type == type)
- return 1;
- break;
- case SEARCH_NOTEQUAL:
- if (msg->m_type != type)
- return 1;
- break;
+ break;
+ case SEARCH_NOTEQUAL:
+ if (msg->m_type != type)
+ return 1;
+ break;
}
return 0;
}
-static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
+static inline bool pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
{
struct msg_receiver *msr, *t;
list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
- if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
- !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
- msr->r_msgtype, msr->r_mode)) {
-
- list_del(&msr->r_list);
- if (msr->r_maxsize < msg->m_ts) {
- /* initialize pipelined send ordering */
- msr->r_msg = NULL;
- wake_up_process(msr->r_tsk);
- smp_mb(); /* see barrier comment below */
- msr->r_msg = ERR_PTR(-E2BIG);
- } else {
- msr->r_msg = NULL;
- msq->q_lrpid = task_pid_vnr(msr->r_tsk);
- msq->q_rtime = get_seconds();
- wake_up_process(msr->r_tsk);
- /*
- * Ensure that the wakeup is visible before
- * setting r_msg, as the receiving end depends
- * on it. See lockless receive part 1 and 2 in
- * do_msgrcv().
- */
- smp_mb();
- msr->r_msg = msg;
-
- return 1;
- }
+ if (!testmsg(msg, msr->r_msgtype, msr->r_mode))
+ continue;
+ if (security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
+ msr->r_msgtype, msr->r_mode))
+ continue;
+
+ /* found a suitable receiver, time to dequeue and wake */
+ list_del(&msr->r_list);
+
+ /* initialize pipelined send ordering */
+ msr->r_msg = NULL;
+
+ if (msr->r_maxsize < msg->m_ts &&
+ !(msr->r_msgflg & MSG_NOERROR)) {
+ wake_up_process(msr->r_tsk);
+ smp_mb(); /* see barrier comment below */
+ msr->r_msg = ERR_PTR(-E2BIG);
+ } else {
+ msq->q_lrpid = task_pid_vnr(msr->r_tsk);
+ msq->q_rtime = get_seconds();
+ wake_up_process(msr->r_tsk);
+
+ /*
+ * Ensure that the wakeup is visible before
+ * setting r_msg, as the receiving end depends
+ * on it. See lockless receive part 1 and 2 in
+ * do_msgrcv().
+ */
+ smp_mb();
+ msr->r_msg = msg;
+
+ return true;
}
}
- return 0;
+ return false; /* no receivers on the other side */
}
long do_msgsnd(int msqid, long mtype, void __user *mtext,
@@ -719,10 +658,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
if (err)
goto out_unlock0;
- if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
- 1 + msq->q_qnum <= msq->q_qbytes) {
- break;
- }
+ if (msgsz + msq->q_cbytes <= msq->q_qbytes)
+ break; /* there is space in the queue for this msg */
/* queue full, wait: */
if (msgflg & IPC_NOWAIT) {
@@ -972,13 +909,14 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
list_add_tail(&msr_d.r_list, &msq->q_receivers);
msr_d.r_tsk = current;
msr_d.r_msgtype = msgtyp;
+ msr_d.r_msgflg = msgflg;
msr_d.r_mode = mode;
if (msgflg & MSG_NOERROR)
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = bufsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
- current->state = TASK_INTERRUPTIBLE;
+ __set_current_state(TASK_INTERRUPTIBLE);
ipc_unlock_object(&msq->q_perm);
rcu_read_unlock();
@@ -1056,6 +994,57 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
}
+/*
+ * Scale msgmni with the available lowmem size: the memory dedicated to msg
+ * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
+ * Also take into account the number of nsproxies created so far.
+ * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
+ */
+void recompute_msgmni(struct ipc_namespace *ns)
+{
+ struct sysinfo i;
+ unsigned long allowed;
+ int nb_ns;
+
+ si_meminfo(&i);
+ allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
+ / MSGMNB;
+ nb_ns = atomic_read(&nr_ipc_ns);
+ allowed /= nb_ns;
+
+ if (allowed < MSGMNI) {
+ ns->msg_ctlmni = MSGMNI;
+ return;
+ }
+
+ if (allowed > IPCMNI / nb_ns) {
+ ns->msg_ctlmni = IPCMNI / nb_ns;
+ return;
+ }
+
+ ns->msg_ctlmni = allowed;
+}
+
+void msg_init_ns(struct ipc_namespace *ns)
+{
+ ns->msg_ctlmax = MSGMAX;
+ ns->msg_ctlmnb = MSGMNB;
+
+ recompute_msgmni(ns);
+
+ atomic_set(&ns->msg_bytes, 0);
+ atomic_set(&ns->msg_hdrs, 0);
+ ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+}
+
+#ifdef CONFIG_IPC_NS
+void msg_exit_ns(struct ipc_namespace *ns)
+{
+ free_ipcs(ns, &msg_ids(ns), freeque);
+ idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
+}
+#endif
+
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
{
@@ -1080,3 +1069,15 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
msq->q_ctime);
}
#endif
+
+void __init msg_init(void)
+{
+ msg_init_ns(&init_ipc_ns);
+
+ printk(KERN_INFO "msgmni has been set to %d\n",
+ init_ipc_ns.msg_ctlmni);
+
+ ipc_init_proc_interface("sysvipc/msg",
+ " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
+ IPC_MSG_IDS, sysvipc_msg_proc_show);
+}
diff --git a/ipc/sem.c b/ipc/sem.c
index bee555417312..fe0928a3d08b 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -87,7 +87,7 @@
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
/* One semaphore structure for each semaphore in the system. */
@@ -160,7 +160,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
* sem_array.pending{_alter,_cont},
* sem_array.sem_undo: global sem_lock() for read/write
* sem_undo.proc_next: only "current" is allowed to read/write that field.
- *
+ *
* sem_array.sem_base[i].pending_{const,alter}:
* global or semaphore sem_lock() for read/write
*/
@@ -564,7 +564,11 @@ static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
{
struct ipc_namespace *ns;
- struct ipc_ops sem_ops;
+ static const struct ipc_ops sem_ops = {
+ .getnew = newary,
+ .associate = sem_security,
+ .more_checks = sem_more_checks,
+ };
struct ipc_params sem_params;
ns = current->nsproxy->ipc_ns;
@@ -572,10 +576,6 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
if (nsems < 0 || nsems > ns->sc_semmsl)
return -EINVAL;
- sem_ops.getnew = newary;
- sem_ops.associate = sem_security;
- sem_ops.more_checks = sem_more_checks;
-
sem_params.key = key;
sem_params.flg = semflg;
sem_params.u.nsems = nsems;
@@ -1161,7 +1161,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
err = security_sem_semctl(NULL, cmd);
if (err)
return err;
-
+
memset(&seminfo, 0, sizeof(seminfo));
seminfo.semmni = ns->sc_semmni;
seminfo.semmns = ns->sc_semmns;
@@ -1181,7 +1181,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
}
max_id = ipc_get_maxid(&sem_ids(ns));
up_read(&sem_ids(ns).rwsem);
- if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
+ if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
return -EFAULT;
return (max_id < 0) ? 0 : max_id;
}
@@ -1883,7 +1883,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
/* We need to sleep on this operation, so we put the current
* task into the pending queue and go to sleep.
*/
-
+
queue.sops = sops;
queue.nsops = nsops;
queue.undo = un;
@@ -2016,7 +2016,7 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
return error;
atomic_inc(&undo_list->refcnt);
tsk->sysvsem.undo_list = undo_list;
- } else
+ } else
tsk->sysvsem.undo_list = NULL;
return 0;
diff --git a/ipc/shm.c b/ipc/shm.c
index 76459616a7fa..89fc354156cb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -43,7 +43,7 @@
#include <linux/mount.h>
#include <linux/ipc_namespace.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
@@ -493,7 +493,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (size < SHMMIN || size > ns->shm_ctlmax)
return -EINVAL;
- if (ns->shm_tot + numpages > ns->shm_ctlall)
+ if (numpages << PAGE_SHIFT < size)
+ return -ENOSPC;
+
+ if (ns->shm_tot + numpages < ns->shm_tot ||
+ ns->shm_tot + numpages > ns->shm_ctlall)
return -ENOSPC;
shp = ipc_rcu_alloc(sizeof(*shp));
@@ -609,15 +613,15 @@ static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
{
struct ipc_namespace *ns;
- struct ipc_ops shm_ops;
+ static const struct ipc_ops shm_ops = {
+ .getnew = newseg,
+ .associate = shm_security,
+ .more_checks = shm_more_checks,
+ };
struct ipc_params shm_params;
ns = current->nsproxy->ipc_ns;
- shm_ops.getnew = newseg;
- shm_ops.associate = shm_security;
- shm_ops.more_checks = shm_more_checks;
-
shm_params.key = key;
shm_params.flg = shmflg;
shm_params.u.size = size;
@@ -694,7 +698,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
out.shmmin = in->shmmin;
out.shmmni = in->shmmni;
out.shmseg = in->shmseg;
- out.shmall = in->shmall;
+ out.shmall = in->shmall;
return copy_to_user(buf, &out, sizeof(out));
}
@@ -1160,6 +1164,9 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
down_write(&current->mm->mmap_sem);
if (addr && !(shmflg & SHM_REMAP)) {
err = -EINVAL;
+ if (addr + size < addr)
+ goto invalid;
+
if (find_vma_intersection(current->mm, addr, addr + size))
goto invalid;
/*
diff --git a/ipc/util.c b/ipc/util.c
index 2eb0d1eaa312..27d74e69fd57 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -183,7 +183,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
* ipc_findkey - find a key in an ipc identifier set
* @ids: ipc identifier set
* @key: key to find
- *
+ *
* Returns the locked pointer to the ipc structure if found or NULL
* otherwise. If key is found ipc points to the owning ipc structure
*
@@ -317,7 +317,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
* when the key is IPC_PRIVATE.
*/
static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
- struct ipc_ops *ops, struct ipc_params *params)
+ const struct ipc_ops *ops, struct ipc_params *params)
{
int err;
@@ -344,7 +344,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
*/
static int ipc_check_perms(struct ipc_namespace *ns,
struct kern_ipc_perm *ipcp,
- struct ipc_ops *ops,
+ const struct ipc_ops *ops,
struct ipc_params *params)
{
int err;
@@ -375,7 +375,7 @@ static int ipc_check_perms(struct ipc_namespace *ns,
* On success, the ipc id is returned.
*/
static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
- struct ipc_ops *ops, struct ipc_params *params)
+ const struct ipc_ops *ops, struct ipc_params *params)
{
struct kern_ipc_perm *ipcp;
int flg = params->flg;
@@ -538,7 +538,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag)
else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid))
granted_mode >>= 3;
/* is there some bit set in requested_mode but not in granted_mode? */
- if ((requested_mode & ~granted_mode & 0007) &&
+ if ((requested_mode & ~granted_mode & 0007) &&
!ns_capable(ns->user_ns, CAP_IPC_OWNER))
return -1;
@@ -678,7 +678,7 @@ out:
* Common routine called by sys_msgget(), sys_semget() and sys_shmget().
*/
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
- struct ipc_ops *ops, struct ipc_params *params)
+ const struct ipc_ops *ops, struct ipc_params *params)
{
if (params->key == IPC_PRIVATE)
return ipcget_new(ns, ids, ops, params);
diff --git a/ipc/util.h b/ipc/util.h
index 9c47d6f6c7b4..1a5a0fcd099c 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -78,9 +78,9 @@ struct ipc_params {
* . routine to call for an extra check if needed
*/
struct ipc_ops {
- int (*getnew) (struct ipc_namespace *, struct ipc_params *);
- int (*associate) (struct kern_ipc_perm *, int);
- int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
+ int (*getnew)(struct ipc_namespace *, struct ipc_params *);
+ int (*associate)(struct kern_ipc_perm *, int);
+ int (*more_checks)(struct kern_ipc_perm *, struct ipc_params *);
};
struct seq_file;
@@ -142,7 +142,7 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
struct ipc64_perm *perm, int extra_perm);
#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
- /* On IA-64, we always use the "64-bit version" of the IPC structures. */
+/* On IA-64, we always use the "64-bit version" of the IPC structures. */
# define ipc_parse_version(cmd) IPC_64
#else
int ipc_parse_version(int *cmd);
@@ -201,7 +201,7 @@ static inline bool ipc_valid_object(struct kern_ipc_perm *perm)
struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
- struct ipc_ops *ops, struct ipc_params *params);
+ const struct ipc_ops *ops, struct ipc_params *params);
void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
void (*free)(struct ipc_namespace *, struct kern_ipc_perm *));
#endif
diff --git a/kernel/acct.c b/kernel/acct.c
index 1853dd4a1d01..d17810f056cf 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -55,7 +55,7 @@
#include <linux/times.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/div64.h>
#include <linux/blkdev.h> /* sector_div */
#include <linux/pid_namespace.h>
@@ -135,7 +135,7 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
spin_lock(&acct_lock);
if (file != acct->file) {
if (act)
- res = act>0;
+ res = act > 0;
goto out;
}
@@ -263,7 +263,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
if (name) {
struct filename *tmp = getname(name);
if (IS_ERR(tmp))
- return (PTR_ERR(tmp));
+ return PTR_ERR(tmp);
error = acct_on(tmp);
putname(tmp);
} else {
diff --git a/kernel/audit.c b/kernel/audit.c
index 81f5f49479da..3ef2e0e797e8 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -44,7 +44,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/init.h>
-#include <asm/types.h>
+#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/mm.h>
#include <linux/export.h>
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index a5e026bc45c4..1323360d90e3 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -19,8 +19,8 @@
static void backtrace_test_normal(void)
{
- printk("Testing a backtrace from process context.\n");
- printk("The following trace is a kernel self test and not a bug!\n");
+ pr_info("Testing a backtrace from process context.\n");
+ pr_info("The following trace is a kernel self test and not a bug!\n");
dump_stack();
}
@@ -37,8 +37,8 @@ static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0);
static void backtrace_test_irq(void)
{
- printk("Testing a backtrace from irq context.\n");
- printk("The following trace is a kernel self test and not a bug!\n");
+ pr_info("Testing a backtrace from irq context.\n");
+ pr_info("The following trace is a kernel self test and not a bug!\n");
init_completion(&backtrace_work);
tasklet_schedule(&backtrace_tasklet);
@@ -51,8 +51,8 @@ static void backtrace_test_saved(void)
struct stack_trace trace;
unsigned long entries[8];
- printk("Testing a saved backtrace.\n");
- printk("The following trace is a kernel self test and not a bug!\n");
+ pr_info("Testing a saved backtrace.\n");
+ pr_info("The following trace is a kernel self test and not a bug!\n");
trace.nr_entries = 0;
trace.max_entries = ARRAY_SIZE(entries);
@@ -65,19 +65,19 @@ static void backtrace_test_saved(void)
#else
static void backtrace_test_saved(void)
{
- printk("Saved backtrace test skipped.\n");
+ pr_info("Saved backtrace test skipped.\n");
}
#endif
static int backtrace_regression_test(void)
{
- printk("====[ backtrace testing ]===========\n");
+ pr_info("====[ backtrace testing ]===========\n");
backtrace_test_normal();
backtrace_test_irq();
backtrace_test_saved();
- printk("====[ end of backtrace testing ]====\n");
+ pr_info("====[ end of backtrace testing ]====\n");
return 0;
}
diff --git a/kernel/capability.c b/kernel/capability.c
index a8d63df0c322..84b2bbf443e7 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -24,7 +24,6 @@
*/
const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
-
EXPORT_SYMBOL(__cap_empty_set);
int file_caps_enabled = 1;
@@ -189,7 +188,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
*
* An alternative would be to return an error here
* (-ERANGE), but that causes legacy applications to
- * unexpectidly fail; the capget/modify/capset aborts
+ * unexpectedly fail; the capget/modify/capset aborts
* before modification is attempted and the application
* fails.
*/
@@ -395,7 +394,8 @@ EXPORT_SYMBOL(ns_capable);
* This does not set PF_SUPERPRIV because the caller may not
* actually be privileged.
*/
-bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap)
+bool file_ns_capable(const struct file *file, struct user_namespace *ns,
+ int cap)
{
if (WARN_ON_ONCE(!cap_valid(cap)))
return false;
diff --git a/kernel/compat.c b/kernel/compat.c
index e40b0430b562..633394f442f8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -157,7 +157,7 @@ static int __compat_put_timespec(const struct timespec *ts, struct compat_timesp
int compat_get_timeval(struct timeval *tv, const void __user *utv)
{
if (COMPAT_USE_64BIT_TIME)
- return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0;
+ return copy_from_user(tv, utv, sizeof(*tv)) ? -EFAULT : 0;
else
return __compat_get_timeval(tv, utv);
}
@@ -166,7 +166,7 @@ EXPORT_SYMBOL_GPL(compat_get_timeval);
int compat_put_timeval(const struct timeval *tv, void __user *utv)
{
if (COMPAT_USE_64BIT_TIME)
- return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0;
+ return copy_to_user(utv, tv, sizeof(*tv)) ? -EFAULT : 0;
else
return __compat_put_timeval(tv, utv);
}
@@ -175,7 +175,7 @@ EXPORT_SYMBOL_GPL(compat_put_timeval);
int compat_get_timespec(struct timespec *ts, const void __user *uts)
{
if (COMPAT_USE_64BIT_TIME)
- return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0;
+ return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
else
return __compat_get_timespec(ts, uts);
}
@@ -184,7 +184,7 @@ EXPORT_SYMBOL_GPL(compat_get_timespec);
int compat_put_timespec(const struct timespec *ts, void __user *uts)
{
if (COMPAT_USE_64BIT_TIME)
- return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0;
+ return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
else
return __compat_put_timespec(ts, uts);
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a9e710eef0e2..c4a146d3699d 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -283,8 +283,7 @@ static inline void check_for_tasks(int cpu)
task_cputime(p, &utime, &stime);
if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
(utime || stime))
- printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
- "(state = %ld, flags = %x)\n",
+ pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n",
p->comm, task_pid_nr(p), cpu,
p->state, p->flags);
}
@@ -336,8 +335,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (err) {
nr_calls--;
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
- printk("%s: attempt to take down CPU %u failed\n",
- __func__, cpu);
+ pr_warn("%s: attempt to take down CPU %u failed\n",
+ __func__, cpu);
goto out_release;
}
@@ -444,8 +443,8 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen)
ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
if (ret) {
nr_calls--;
- printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
- __func__, cpu);
+ pr_warn("%s: attempt to bring up CPU %u failed\n",
+ __func__, cpu);
goto out_notify;
}
@@ -475,11 +474,10 @@ int cpu_up(unsigned int cpu)
int err = 0;
if (!cpu_possible(cpu)) {
- printk(KERN_ERR "can't online cpu %d because it is not "
- "configured as may-hotadd at boot time\n", cpu);
+ pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
+ cpu);
#if defined(CONFIG_IA64)
- printk(KERN_ERR "please check additional_cpus= boot "
- "parameter\n");
+ pr_err("please check additional_cpus= boot parameter\n");
#endif
return -EINVAL;
}
@@ -518,7 +516,7 @@ int disable_nonboot_cpus(void)
*/
cpumask_clear(frozen_cpus);
- printk("Disabling non-boot CPUs ...\n");
+ pr_info("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
if (cpu == first_cpu)
continue;
@@ -526,8 +524,7 @@ int disable_nonboot_cpus(void)
if (!error)
cpumask_set_cpu(cpu, frozen_cpus);
else {
- printk(KERN_ERR "Error taking CPU%d down: %d\n",
- cpu, error);
+ pr_err("Error taking CPU%d down: %d\n", cpu, error);
break;
}
}
@@ -537,7 +534,7 @@ int disable_nonboot_cpus(void)
/* Make sure the CPUs won't be enabled by someone else */
cpu_hotplug_disabled = 1;
} else {
- printk(KERN_ERR "Non-boot CPUs are not disabled\n");
+ pr_err("Non-boot CPUs are not disabled\n");
}
cpu_maps_update_done();
return error;
@@ -561,17 +558,17 @@ void __ref enable_nonboot_cpus(void)
if (cpumask_empty(frozen_cpus))
goto out;
- printk(KERN_INFO "Enabling non-boot CPUs ...\n");
+ pr_info("Enabling non-boot CPUs ...\n");
arch_enable_nonboot_cpus_begin();
for_each_cpu(cpu, frozen_cpus) {
error = _cpu_up(cpu, 1);
if (!error) {
- printk(KERN_INFO "CPU%d is up\n", cpu);
+ pr_info("CPU%d is up\n", cpu);
continue;
}
- printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
+ pr_warn("Error taking CPU%d up: %d\n", cpu, error);
}
arch_enable_nonboot_cpus_end();
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2f4b08b8db24..be9b48359a31 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,12 +61,7 @@
#include <linux/cgroup.h>
#include <linux/wait.h>
-/*
- * Tracks how many cpusets are currently defined in system.
- * When there is only one cpuset (the root cpuset) we can
- * short circuit some hooks.
- */
-int number_of_cpusets __read_mostly;
+struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
/* See "Frequency meter" comments, below. */
@@ -611,7 +606,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
goto done;
}
- csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
+ csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL);
if (!csa)
goto done;
csn = 0;
@@ -1888,7 +1883,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
if (is_spread_slab(parent))
set_bit(CS_SPREAD_SLAB, &cs->flags);
- number_of_cpusets++;
+ cpuset_inc();
if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
goto out_unlock;
@@ -1939,7 +1934,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
if (is_sched_load_balance(cs))
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
- number_of_cpusets--;
+ cpuset_dec();
clear_bit(CS_ONLINE, &cs->flags);
mutex_unlock(&cpuset_mutex);
@@ -1992,7 +1987,6 @@ int __init cpuset_init(void)
if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
BUG();
- number_of_cpusets = 1;
return 0;
}
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 0dbeae374225..83d4382f5699 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -37,7 +37,7 @@ static unsigned long ident_map[32] = {
struct exec_domain default_exec_domain = {
.name = "Linux", /* name */
.handler = default_handler, /* lcall7 causes a seg fault. */
- .pers_low = 0, /* PER_LINUX personality. */
+ .pers_low = 0, /* PER_LINUX personality. */
.pers_high = 0, /* PER_LINUX personality. */
.signal_map = ident_map, /* Identity map signals. */
.signal_invmap = ident_map, /* - both ways. */
@@ -83,7 +83,7 @@ lookup_exec_domain(unsigned int personality)
ep = &default_exec_domain;
out:
read_unlock(&exec_domains_lock);
- return (ep);
+ return ep;
}
int
@@ -110,8 +110,9 @@ register_exec_domain(struct exec_domain *ep)
out:
write_unlock(&exec_domains_lock);
- return (err);
+ return err;
}
+EXPORT_SYMBOL(register_exec_domain);
int
unregister_exec_domain(struct exec_domain *ep)
@@ -133,6 +134,7 @@ unregister:
write_unlock(&exec_domains_lock);
return 0;
}
+EXPORT_SYMBOL(unregister_exec_domain);
int __set_personality(unsigned int personality)
{
@@ -144,6 +146,7 @@ int __set_personality(unsigned int personality)
return 0;
}
+EXPORT_SYMBOL(__set_personality);
#ifdef CONFIG_PROC_FS
static int execdomains_proc_show(struct seq_file *m, void *v)
@@ -188,8 +191,3 @@ SYSCALL_DEFINE1(personality, unsigned int, personality)
return old;
}
-
-
-EXPORT_SYMBOL(register_exec_domain);
-EXPORT_SYMBOL(unregister_exec_domain);
-EXPORT_SYMBOL(__set_personality);
diff --git a/kernel/exit.c b/kernel/exit.c
index 6ed6a1d552b5..e5c4668f1799 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -313,46 +313,7 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
}
}
-/*
- * Let kernel threads use this to say that they allow a certain signal.
- * Must not be used if kthread was cloned with CLONE_SIGHAND.
- */
-int allow_signal(int sig)
-{
- if (!valid_signal(sig) || sig < 1)
- return -EINVAL;
-
- spin_lock_irq(&current->sighand->siglock);
- /* This is only needed for daemonize()'ed kthreads */
- sigdelset(&current->blocked, sig);
- /*
- * Kernel threads handle their own signals. Let the signal code
- * know it'll be handled, so that they don't get converted to
- * SIGKILL or just silently dropped.
- */
- current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- return 0;
-}
-
-EXPORT_SYMBOL(allow_signal);
-
-int disallow_signal(int sig)
-{
- if (!valid_signal(sig) || sig < 1)
- return -EINVAL;
-
- spin_lock_irq(&current->sighand->siglock);
- current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- return 0;
-}
-
-EXPORT_SYMBOL(disallow_signal);
-
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
@@ -395,14 +356,18 @@ retry:
}
/*
- * Search through everything else. We should not get
- * here often
+ * Search through everything else, we should not get here often.
*/
- do_each_thread(g, c) {
- if (c->mm == mm)
- goto assign_new_owner;
- } while_each_thread(g, c);
-
+ for_each_process(g) {
+ if (g->flags & PF_KTHREAD)
+ continue;
+ for_each_thread(g, c) {
+ if (c->mm == mm)
+ goto assign_new_owner;
+ if (c->mm)
+ break;
+ }
+ }
read_unlock(&tasklist_lock);
/*
* We found no owner yet mm_users > 1: this implies that we are
@@ -434,7 +399,7 @@ assign_new_owner:
task_unlock(c);
put_task_struct(c);
}
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
/*
* Turn us into a lazy TLB process if we
diff --git a/kernel/fork.c b/kernel/fork.c
index 54a8d26f612f..d2799d1fc952 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
- struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
- THREAD_SIZE_ORDER);
+ struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
- free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
@@ -1099,12 +1099,12 @@ static void rt_mutex_init_task(struct task_struct *p)
#endif
}
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
{
mm->owner = p;
}
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
/*
* Initialize POSIX timer handling for a single task.
@@ -1606,10 +1606,12 @@ long do_fork(unsigned long clone_flags,
*/
if (!IS_ERR(p)) {
struct completion vfork;
+ struct pid *pid;
trace_sched_process_fork(current, p);
- nr = task_pid_vnr(p);
+ pid = get_task_pid(p, PIDTYPE_PID);
+ nr = pid_vnr(pid);
if (clone_flags & CLONE_PARENT_SETTID)
put_user(nr, parent_tidptr);
@@ -1624,12 +1626,14 @@ long do_fork(unsigned long clone_flags,
/* forking complete and child started to run, tell ptracer */
if (unlikely(trace))
- ptrace_event(trace, nr);
+ ptrace_event_pid(trace, pid);
if (clone_flags & CLONE_VFORK) {
if (!wait_for_vfork_done(p, &vfork))
- ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+ ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
}
+
+ put_pid(pid);
} else {
nr = PTR_ERR(p);
}
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index f45b75b713c0..b358a802fd18 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -85,6 +85,12 @@ void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters)
}
EXPORT_SYMBOL(__gcov_merge_ior);
+void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters)
+{
+ /* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_time_profile);
+
/**
* gcov_enable_events - enable event reporting through gcov_event()
*
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 2c6e4631c814..826ba9fb5e32 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,12 @@
#include <linux/vmalloc.h>
#include "gcov.h"
+#if __GNUC__ == 4 && __GNUC_MINOR__ >= 9
+#define GCOV_COUNTERS 9
+#else
#define GCOV_COUNTERS 8
+#endif
+
#define GCOV_TAG_FUNCTION_LENGTH 3
static struct gcov_info *gcov_info_head;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 06bb1417b063..06db12434d72 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -52,8 +52,10 @@ unsigned int __read_mostly sysctl_hung_task_panic =
static int __init hung_task_panic_setup(char *str)
{
- sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+ int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);
+ if (rc)
+ return rc;
return 1;
}
__setup("hung_task_panic=", hung_task_panic_setup);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c8380ad203bc..bf0b929e7f94 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -125,8 +125,8 @@ static struct page *kimage_alloc_page(struct kimage *image,
unsigned long dest);
static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+ unsigned long nr_segments,
+ struct kexec_segment __user *segments)
{
size_t segment_bytes;
struct kimage *image;
@@ -257,13 +257,13 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
- printk(KERN_ERR "Could not allocate control_code_buffer\n");
+ pr_err("Could not allocate control_code_buffer\n");
goto out_free;
}
image->swap_page = kimage_alloc_control_pages(image, 0);
if (!image->swap_page) {
- printk(KERN_ERR "Could not allocate swap buffer\n");
+ pr_err("Could not allocate swap buffer\n");
goto out_free;
}
@@ -332,7 +332,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
- printk(KERN_ERR "Could not allocate control_code_buffer\n");
+ pr_err("Could not allocate control_code_buffer\n");
goto out_free;
}
@@ -621,8 +621,8 @@ static void kimage_terminate(struct kimage *image)
#define for_each_kimage_entry(image, ptr, entry) \
for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
- ptr = (entry & IND_INDIRECTION)? \
- phys_to_virt((entry & PAGE_MASK)): ptr +1)
+ ptr = (entry & IND_INDIRECTION) ? \
+ phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
static void kimage_free_entry(kimage_entry_t entry)
{
@@ -650,8 +650,7 @@ static void kimage_free(struct kimage *image)
* done with it.
*/
ind = entry;
- }
- else if (entry & IND_SOURCE)
+ } else if (entry & IND_SOURCE)
kimage_free_entry(entry);
}
/* Free the final indirection page */
@@ -774,8 +773,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
addr = old_addr;
page = old_page;
break;
- }
- else {
+ } else {
/* Place the page on the destination list I
* will use it later.
*/
@@ -1059,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
return -EINVAL;
ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
- for (i=0; i < nr_segments; i++) {
+ for (i = 0; i < nr_segments; i++) {
result = copy_from_user(&in, &segments[i], sizeof(in));
if (result)
return -EFAULT;
@@ -1214,14 +1212,14 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
* squirrelled away. ELF notes happen to provide
* all of that, so there is no need to invent something new.
*/
- buf = (u32*)per_cpu_ptr(crash_notes, cpu);
+ buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.pr_pid = current->pid;
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
- &prstatus, sizeof(prstatus));
+ &prstatus, sizeof(prstatus));
final_note(buf);
}
@@ -1230,8 +1228,7 @@ static int __init crash_notes_memory_init(void)
/* Allocate memory for saving cpu registers. */
crash_notes = alloc_percpu(note_buf_t);
if (!crash_notes) {
- printk("Kexec: Memory allocation for saving cpu register"
- " states failed\n");
+ pr_warn("Kexec: Memory allocation for saving cpu register states failed\n");
return -ENOMEM;
}
return 0;
@@ -1253,10 +1250,10 @@ subsys_initcall(crash_notes_memory_init);
*
* The function returns 0 on success and -EINVAL on failure.
*/
-static int __init parse_crashkernel_mem(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
+static int __init parse_crashkernel_mem(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
{
char *cur = cmdline, *tmp;
@@ -1267,12 +1264,12 @@ static int __init parse_crashkernel_mem(char *cmdline,
/* get the start of the range */
start = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warning("crashkernel: Memory value expected\n");
+ pr_warn("crashkernel: Memory value expected\n");
return -EINVAL;
}
cur = tmp;
if (*cur != '-') {
- pr_warning("crashkernel: '-' expected\n");
+ pr_warn("crashkernel: '-' expected\n");
return -EINVAL;
}
cur++;
@@ -1281,31 +1278,30 @@ static int __init parse_crashkernel_mem(char *cmdline,
if (*cur != ':') {
end = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warning("crashkernel: Memory "
- "value expected\n");
+ pr_warn("crashkernel: Memory value expected\n");
return -EINVAL;
}
cur = tmp;
if (end <= start) {
- pr_warning("crashkernel: end <= start\n");
+ pr_warn("crashkernel: end <= start\n");
return -EINVAL;
}
}
if (*cur != ':') {
- pr_warning("crashkernel: ':' expected\n");
+ pr_warn("crashkernel: ':' expected\n");
return -EINVAL;
}
cur++;
size = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warning("Memory value expected\n");
+ pr_warn("Memory value expected\n");
return -EINVAL;
}
cur = tmp;
if (size >= system_ram) {
- pr_warning("crashkernel: invalid size\n");
+ pr_warn("crashkernel: invalid size\n");
return -EINVAL;
}
@@ -1323,8 +1319,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
cur++;
*crash_base = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warning("Memory value expected "
- "after '@'\n");
+ pr_warn("Memory value expected after '@'\n");
return -EINVAL;
}
}
@@ -1336,26 +1331,26 @@ static int __init parse_crashkernel_mem(char *cmdline,
/*
* That function parses "simple" (old) crashkernel command lines like
*
- * crashkernel=size[@offset]
+ * crashkernel=size[@offset]
*
* It returns 0 on success and -EINVAL on failure.
*/
-static int __init parse_crashkernel_simple(char *cmdline,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
+static int __init parse_crashkernel_simple(char *cmdline,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
{
char *cur = cmdline;
*crash_size = memparse(cmdline, &cur);
if (cmdline == cur) {
- pr_warning("crashkernel: memory value expected\n");
+ pr_warn("crashkernel: memory value expected\n");
return -EINVAL;
}
if (*cur == '@')
*crash_base = memparse(cur+1, &cur);
else if (*cur != ' ' && *cur != '\0') {
- pr_warning("crashkernel: unrecognized char\n");
+ pr_warn("crashkernel: unrecognized char\n");
return -EINVAL;
}
@@ -1622,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
#ifdef CONFIG_MEMORY_FAILURE
VMCOREINFO_NUMBER(PG_hwpoison);
#endif
+ VMCOREINFO_NUMBER(PG_head_mask);
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
arch_crash_save_vmcoreinfo();
@@ -1683,7 +1679,7 @@ int kernel_kexec(void)
kexec_in_progress = true;
kernel_restart_prepare(NULL);
migrate_to_reboot_cpu();
- printk(KERN_EMERG "Starting new kernel\n");
+ pr_emerg("Starting new kernel\n");
machine_shutdown();
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0ac67a5861c5..8637e041a247 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -285,10 +285,7 @@ static int wait_for_helper(void *data)
pid_t pid;
/* If SIGCLD is ignored sys_wait4 won't populate the status. */
- spin_lock_irq(&current->sighand->siglock);
- current->sighand->action[SIGCHLD-1].sa.sa_handler = SIG_DFL;
- spin_unlock_irq(&current->sighand->siglock);
-
+ kernel_sigaction(SIGCHLD, SIG_DFL);
pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
if (pid < 0) {
sub_info->retval = pid;
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index a462b317f9a0..a02812743a7e 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -88,7 +88,8 @@ static void clear_global_latency_tracing(void)
}
static void __sched
-account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
+account_global_scheduler_latency(struct task_struct *tsk,
+ struct latency_record *lat)
{
int firstnonnull = MAXLR + 1;
int i;
@@ -255,7 +256,7 @@ static int lstats_show(struct seq_file *m, void *v)
break;
seq_printf(m, " %ps", (void *)bt);
}
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
}
}
return 0;
diff --git a/kernel/panic.c b/kernel/panic.c
index d02fa9fef46a..62e16cef9cc2 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -32,6 +32,7 @@ static unsigned long tainted_mask;
static int pause_on_oops;
static int pause_on_oops_flag;
static DEFINE_SPINLOCK(pause_on_oops_lock);
+static bool crash_kexec_post_notifiers;
int panic_timeout = CONFIG_PANIC_TIMEOUT;
EXPORT_SYMBOL_GPL(panic_timeout);
@@ -112,9 +113,11 @@ void panic(const char *fmt, ...)
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
- * Do we want to call this before we try to display a message?
+ * If we want to run this after calling panic_notifiers, pass
+ * the "crash_kexec_post_notifiers" option to the kernel.
*/
- crash_kexec(NULL);
+ if (!crash_kexec_post_notifiers)
+ crash_kexec(NULL);
/*
* Note smp_send_stop is the usual smp shutdown function, which
@@ -131,6 +134,15 @@ void panic(const char *fmt, ...)
kmsg_dump(KMSG_DUMP_PANIC);
+ /*
+ * If you doubt kdump always works fine in any situation,
+ * "crash_kexec_post_notifiers" offers you a chance to run
+ * panic_notifiers and dumping kmsg before kdump.
+ * Note: since some panic_notifiers can make crashed kernel
+ * more unstable, it can increase risks of the kdump failure too.
+ */
+ crash_kexec(NULL);
+
bust_spinlocks(0);
if (!panic_blink)
@@ -472,6 +484,13 @@ EXPORT_SYMBOL(__stack_chk_fail);
core_param(panic, panic_timeout, int, 0644);
core_param(pause_on_oops, pause_on_oops, int, 0644);
+static int __init setup_crash_kexec_post_notifiers(char *s)
+{
+ crash_kexec_post_notifiers = true;
+ return 0;
+}
+early_param("crash_kexec_post_notifiers", setup_crash_kexec_post_notifiers);
+
static int __init oops_setup(char *s)
{
if (!s)
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 424c2d4265c9..86535c06a154 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -71,7 +71,7 @@ static DEFINE_SPINLOCK(hash_lock);
* SIGEV values. Here we put out an error if this assumption fails.
*/
#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
- ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
+ ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
@@ -252,7 +252,8 @@ static int posix_get_monotonic_coarse(clockid_t which_clock,
return 0;
}
-static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+static int posix_get_coarse_res(const clockid_t which_clock,
+ struct timespec *tp)
{
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;
@@ -333,14 +334,16 @@ static __init int init_posix_timers(void)
posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_REALTIME_COARSE,
+ &clock_realtime_coarse);
+ posix_timers_register_clock(CLOCK_MONOTONIC_COARSE,
+ &clock_monotonic_coarse);
posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
posix_timers_register_clock(CLOCK_TAI, &clock_tai);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
- sizeof (struct k_itimer), 0, SLAB_PANIC,
- NULL);
+ sizeof(struct k_itimer), 0,
+ SLAB_PANIC, NULL);
return 0;
}
@@ -494,11 +497,11 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
return ret;
}
-static struct pid *good_sigevent(sigevent_t * event)
+static struct pid *good_sigevent(sigevent_t *event)
{
struct task_struct *rtn = current->group_leader;
- if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+ if ((event->sigev_notify & SIGEV_THREAD_ID) &&
(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
!same_thread_group(rtn, current) ||
(event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
@@ -515,18 +518,18 @@ void posix_timers_register_clock(const clockid_t clock_id,
struct k_clock *new_clock)
{
if ((unsigned) clock_id >= MAX_CLOCKS) {
- printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ pr_warn("POSIX clock register failed for clock_id %d\n",
clock_id);
return;
}
if (!new_clock->clock_get) {
- printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ pr_warn("POSIX clock id %d lacks clock_get()\n",
clock_id);
return;
}
if (!new_clock->clock_getres) {
- printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
+ pr_warn("POSIX clock id %d lacks clock_getres()\n",
clock_id);
return;
}
@@ -535,7 +538,7 @@ void posix_timers_register_clock(const clockid_t clock_id,
}
EXPORT_SYMBOL_GPL(posix_timers_register_clock);
-static struct k_itimer * alloc_posix_timer(void)
+static struct k_itimer *alloc_posix_timer(void)
{
struct k_itimer *tmr;
tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
@@ -622,7 +625,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->it_overrun = -1;
if (timer_event_spec) {
- if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
+ if (copy_from_user(&event, timer_event_spec, sizeof(event))) {
error = -EFAULT;
goto out;
}
@@ -647,7 +650,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->sigq->info.si_code = SI_TIMER;
if (copy_to_user(created_timer_id,
- &new_timer_id, sizeof (new_timer_id))) {
+ &new_timer_id, sizeof(new_timer_id))) {
error = -EFAULT;
goto out;
}
@@ -748,7 +751,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
*/
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
- timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+ timr->it_overrun += (unsigned int) hrtimer_forward(timer, now,
+ iv);
remaining = ktime_sub(hrtimer_get_expires(timer), now);
/* Return 0 only, when the timer is expired and not pending */
@@ -785,7 +789,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
unlock_timer(timr, flags);
- if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ if (!ret && copy_to_user(setting, &cur_setting, sizeof(cur_setting)))
return -EFAULT;
return ret;
@@ -837,7 +841,7 @@ common_timer_set(struct k_itimer *timr, int flags,
if (hrtimer_try_to_cancel(timer) < 0)
return TIMER_RETRY;
- timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
+ timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -857,9 +861,8 @@ common_timer_set(struct k_itimer *timr, int flags,
/* SIGEV_NONE timers are not queued ! See common_timer_get */
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
/* Setup correct expiry time for relative timers */
- if (mode == HRTIMER_MODE_REL) {
+ if (mode == HRTIMER_MODE_REL)
hrtimer_add_expires(timer, timer->base->get_time());
- }
return 0;
}
@@ -882,7 +885,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
if (!new_setting)
return -EINVAL;
- if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
+ if (copy_from_user(&new_spec, new_setting, sizeof(new_spec)))
return -EFAULT;
if (!timespec_valid(&new_spec.it_interval) ||
@@ -901,12 +904,12 @@ retry:
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
- rtn = NULL; // We already got the old time...
+ rtn = NULL; /* We already got the old time... */
goto retry;
}
if (old_setting && !error &&
- copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+ copy_to_user(old_setting, &old_spec, sizeof(old_spec)))
error = -EFAULT;
return error;
@@ -1008,14 +1011,14 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
if (!kc || !kc->clock_set)
return -EINVAL;
- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+ if (copy_from_user(&new_tp, tp, sizeof(*tp)))
return -EFAULT;
return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
- struct timespec __user *,tp)
+ struct timespec __user *, tp)
{
struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec kernel_tp;
@@ -1026,7 +1029,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
error = kc->clock_get(which_clock, &kernel_tp);
- if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+ if (!error && copy_to_user(tp, &kernel_tp, sizeof(kernel_tp)))
error = -EFAULT;
return error;
@@ -1067,7 +1070,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
error = kc->clock_getres(which_clock, &rtn_tp);
- if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
+ if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof(rtn_tp)))
error = -EFAULT;
return error;
@@ -1096,7 +1099,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
if (!kc->nsleep)
return -ENANOSLEEP_NOTSUP;
- if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+ if (copy_from_user(&t, rqtp, sizeof(struct timespec)))
return -EFAULT;
if (!timespec_valid(&t))
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7228258b85ec..b030cb2843b6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -68,6 +68,9 @@ int console_printk[4] = {
DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */
};
+/* Deferred messaged from sched code are marked by this special level */
+#define SCHED_MESSAGE_LOGLEVEL -2
+
/*
* Low level drivers may need that to know if they can schedule in
* their unblank() callback or not. So let's export it.
@@ -91,6 +94,29 @@ static struct lockdep_map console_lock_dep_map = {
#endif
/*
+ * Helper macros to handle lockdep when locking/unlocking console_sem. We use
+ * macros instead of functions so that _RET_IP_ contains useful information.
+ */
+#define down_console_sem() do { \
+ down(&console_sem);\
+ mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
+} while (0)
+
+static int __down_trylock_console_sem(unsigned long ip)
+{
+ if (down_trylock(&console_sem))
+ return 1;
+ mutex_acquire(&console_lock_dep_map, 0, 1, ip);
+ return 0;
+}
+#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)
+
+#define up_console_sem() do { \
+ mutex_release(&console_lock_dep_map, 1, _RET_IP_);\
+ up(&console_sem);\
+} while (0)
+
+/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
* definitely not the perfect debug tool (we don't know if _WE_
@@ -206,8 +232,9 @@ struct printk_log {
};
/*
- * The logbuf_lock protects kmsg buffer, indices, counters. It is also
- * used in interesting ways to provide interlocking in console_unlock();
+ * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
+ * within the scheduler's rq lock. It must be released before calling
+ * console_unlock() or anything else that might wake up a process.
*/
static DEFINE_RAW_SPINLOCK(logbuf_lock);
@@ -250,9 +277,6 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
-/* cpu currently holding logbuf_lock */
-static volatile unsigned int logbuf_cpu = UINT_MAX;
-
/* human readable text of the record */
static char *log_text(const struct printk_log *msg)
{
@@ -297,34 +321,106 @@ static u32 log_next(u32 idx)
return idx + msg->len;
}
-/* insert record into the buffer, discard old ones, update heads */
-static void log_store(int facility, int level,
- enum log_flags flags, u64 ts_nsec,
- const char *dict, u16 dict_len,
- const char *text, u16 text_len)
+/*
+ * Check whether there is enough free space for the given message.
+ *
+ * The same values of first_idx and next_idx mean that the buffer
+ * is either empty or full.
+ *
+ * If the buffer is empty, we must respect the position of the indexes.
+ * They cannot be reset to the beginning of the buffer.
+ */
+static int logbuf_has_space(u32 msg_size, bool empty)
{
- struct printk_log *msg;
- u32 size, pad_len;
+ u32 free;
- /* number of '\0' padding bytes to next message */
- size = sizeof(struct printk_log) + text_len + dict_len;
- pad_len = (-size) & (LOG_ALIGN - 1);
- size += pad_len;
+ if (log_next_idx > log_first_idx || empty)
+ free = max(log_buf_len - log_next_idx, log_first_idx);
+ else
+ free = log_first_idx - log_next_idx;
+
+ /*
+ * We need space also for an empty header that signalizes wrapping
+ * of the buffer.
+ */
+ return free >= msg_size + sizeof(struct printk_log);
+}
+static int log_make_free_space(u32 msg_size)
+{
while (log_first_seq < log_next_seq) {
- u32 free;
+ if (logbuf_has_space(msg_size, false))
+ return 0;
+ /* drop old messages until we have enough continuous space */
+ log_first_idx = log_next(log_first_idx);
+ log_first_seq++;
+ }
- if (log_next_idx > log_first_idx)
- free = max(log_buf_len - log_next_idx, log_first_idx);
- else
- free = log_first_idx - log_next_idx;
+ /* sequence numbers are equal, so the log buffer is empty */
+ if (logbuf_has_space(msg_size, true))
+ return 0;
- if (free >= size + sizeof(struct printk_log))
- break;
+ return -ENOMEM;
+}
- /* drop old messages until we have enough contiuous space */
- log_first_idx = log_next(log_first_idx);
- log_first_seq++;
+/* compute the message size including the padding bytes */
+static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
+{
+ u32 size;
+
+ size = sizeof(struct printk_log) + text_len + dict_len;
+ *pad_len = (-size) & (LOG_ALIGN - 1);
+ size += *pad_len;
+
+ return size;
+}
+
+/*
+ * Define how much of the log buffer we could take at maximum. The value
+ * must be greater than two. Note that only half of the buffer is available
+ * when the index points to the middle.
+ */
+#define MAX_LOG_TAKE_PART 4
+static const char trunc_msg[] = "<truncated>";
+
+static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
+ u16 *dict_len, u32 *pad_len)
+{
+ /*
+ * The message should not take the whole buffer. Otherwise, it might
+ * get removed too soon.
+ */
+ u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
+ if (*text_len > max_text_len)
+ *text_len = max_text_len;
+ /* enable the warning message */
+ *trunc_msg_len = strlen(trunc_msg);
+ /* disable the "dict" completely */
+ *dict_len = 0;
+ /* compute the size again, count also the warning message */
+ return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
+}
+
+/* insert record into the buffer, discard old ones, update heads */
+static int log_store(int facility, int level,
+ enum log_flags flags, u64 ts_nsec,
+ const char *dict, u16 dict_len,
+ const char *text, u16 text_len)
+{
+ struct printk_log *msg;
+ u32 size, pad_len;
+ u16 trunc_msg_len = 0;
+
+ /* number of '\0' padding bytes to next message */
+ size = msg_used_size(text_len, dict_len, &pad_len);
+
+ if (log_make_free_space(size)) {
+ /* truncate the message if it is too long for empty buffer */
+ size = truncate_msg(&text_len, &trunc_msg_len,
+ &dict_len, &pad_len);
+ /* survive when the log buffer is too small for trunc_msg */
+ if (log_make_free_space(size))
+ return 0;
}
if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
@@ -341,6 +437,10 @@ static void log_store(int facility, int level,
msg = (struct printk_log *)(log_buf + log_next_idx);
memcpy(log_text(msg), text, text_len);
msg->text_len = text_len;
+ if (trunc_msg_len) {
+ memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
+ msg->text_len += trunc_msg_len;
+ }
memcpy(log_dict(msg), dict, dict_len);
msg->dict_len = dict_len;
msg->facility = facility;
@@ -356,6 +456,8 @@ static void log_store(int facility, int level,
/* insert message */
log_next_idx += msg->len;
log_next_seq++;
+
+ return msg->text_len;
}
#ifdef CONFIG_SECURITY_DMESG_RESTRICT
@@ -1303,7 +1405,10 @@ static void zap_locks(void)
sema_init(&console_sem, 1);
}
-/* Check if we have any console registered that can be called early in boot. */
+/*
+ * Check if we have any console that is capable of printing while cpu is
+ * booting or shutting down. Requires console_sem.
+ */
static int have_callable_console(void)
{
struct console *con;
@@ -1318,10 +1423,9 @@ static int have_callable_console(void)
/*
* Can we actually use the console at this time on this cpu?
*
- * Console drivers may assume that per-cpu resources have
- * been allocated. So unless they're explicitly marked as
- * being able to cope (CON_ANYTIME) don't call them until
- * this CPU is officially up.
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
*/
static inline int can_use_console(unsigned int cpu)
{
@@ -1333,36 +1437,24 @@ static inline int can_use_console(unsigned int cpu)
* messages from a 'printk'. Return true (and with the
* console_lock held, and 'console_locked' set) if it
* is successful, false otherwise.
- *
- * This gets called with the 'logbuf_lock' spinlock held and
- * interrupts disabled. It should return with 'lockbuf_lock'
- * released but interrupts still disabled.
*/
-static int console_trylock_for_printk(unsigned int cpu)
- __releases(&logbuf_lock)
+static int console_trylock_for_printk(void)
{
- int retval = 0, wake = 0;
-
- if (console_trylock()) {
- retval = 1;
+ unsigned int cpu = smp_processor_id();
- /*
- * If we can't use the console, we need to release
- * the console semaphore by hand to avoid flushing
- * the buffer. We need to hold the console semaphore
- * in order to do this test safely.
- */
- if (!can_use_console(cpu)) {
- console_locked = 0;
- wake = 1;
- retval = 0;
- }
+ if (!console_trylock())
+ return 0;
+ /*
+ * If we can't use the console, we need to release the console
+ * semaphore by hand to avoid flushing the buffer. We need to hold the
+ * console semaphore in order to do this test safely.
+ */
+ if (!can_use_console(cpu)) {
+ console_locked = 0;
+ up_console_sem();
+ return 0;
}
- logbuf_cpu = UINT_MAX;
- raw_spin_unlock(&logbuf_lock);
- if (wake)
- up(&console_sem);
- return retval;
+ return 1;
}
int printk_delay_msec __read_mostly;
@@ -1490,11 +1582,19 @@ asmlinkage int vprintk_emit(int facility, int level,
static int recursion_bug;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
- size_t text_len;
+ size_t text_len = 0;
enum log_flags lflags = 0;
unsigned long flags;
int this_cpu;
int printed_len = 0;
+ bool in_sched = false;
+ /* cpu currently holding logbuf_lock in this function */
+ static volatile unsigned int logbuf_cpu = UINT_MAX;
+
+ if (level == SCHED_MESSAGE_LOGLEVEL) {
+ level = -1;
+ in_sched = true;
+ }
boot_delay_msec(level);
printk_delay();
@@ -1516,7 +1616,8 @@ asmlinkage int vprintk_emit(int facility, int level,
*/
if (!oops_in_progress && !lockdep_recursing(current)) {
recursion_bug = 1;
- goto out_restore_irqs;
+ local_irq_restore(flags);
+ return 0;
}
zap_locks();
}
@@ -1530,17 +1631,22 @@ asmlinkage int vprintk_emit(int facility, int level,
"BUG: recent printk recursion!";
recursion_bug = 0;
- printed_len += strlen(recursion_msg);
+ text_len = strlen(recursion_msg);
/* emit KERN_CRIT message */
- log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
- NULL, 0, recursion_msg, printed_len);
+ printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
+ NULL, 0, recursion_msg, text_len);
}
/*
* The printf needs to come first; we need the syslog
* prefix which might be passed-in as a parameter.
*/
- text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
+ if (in_sched)
+ text_len = scnprintf(text, sizeof(textbuf),
+ KERN_WARNING "[sched_delayed] ");
+
+ text_len += vscnprintf(text + text_len,
+ sizeof(textbuf) - text_len, fmt, args);
/* mark and strip a trailing newline */
if (text_len && text[text_len-1] == '\n') {
@@ -1586,9 +1692,12 @@ asmlinkage int vprintk_emit(int facility, int level,
cont_flush(LOG_NEWLINE);
/* buffer line if possible, otherwise store it right away */
- if (!cont_add(facility, level, text, text_len))
- log_store(facility, level, lflags | LOG_CONT, 0,
- dict, dictlen, text, text_len);
+ if (cont_add(facility, level, text, text_len))
+ printed_len += text_len;
+ else
+ printed_len += log_store(facility, level,
+ lflags | LOG_CONT, 0,
+ dict, dictlen, text, text_len);
} else {
bool stored = false;
@@ -1607,26 +1716,35 @@ asmlinkage int vprintk_emit(int facility, int level,
cont_flush(LOG_NEWLINE);
}
- if (!stored)
- log_store(facility, level, lflags, 0,
- dict, dictlen, text, text_len);
+ if (stored)
+ printed_len += text_len;
+ else
+ printed_len += log_store(facility, level, lflags, 0,
+ dict, dictlen, text, text_len);
}
- printed_len += text_len;
+ logbuf_cpu = UINT_MAX;
+ raw_spin_unlock(&logbuf_lock);
+ lockdep_on();
+ local_irq_restore(flags);
+
+ /* If called from the scheduler, we can not call up(). */
+ if (in_sched)
+ return printed_len;
+
+ /*
+ * Disable preemption to avoid being preempted while holding
+ * console_sem which would prevent anyone from printing to console
+ */
+ preempt_disable();
/*
* Try to acquire and then immediately release the console semaphore.
* The release will print out buffers and wake up /dev/kmsg and syslog()
* users.
- *
- * The console_trylock_for_printk() function will release 'logbuf_lock'
- * regardless of whether it actually gets the console semaphore or not.
*/
- if (console_trylock_for_printk(this_cpu))
+ if (console_trylock_for_printk())
console_unlock();
-
- lockdep_on();
-out_restore_irqs:
- local_irq_restore(flags);
+ preempt_enable();
return printed_len;
}
@@ -1882,16 +2000,14 @@ void suspend_console(void)
printk("Suspending console(s) (use no_console_suspend to debug)\n");
console_lock();
console_suspended = 1;
- up(&console_sem);
- mutex_release(&console_lock_dep_map, 1, _RET_IP_);
+ up_console_sem();
}
void resume_console(void)
{
if (!console_suspend_enabled)
return;
- down(&console_sem);
- mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
+ down_console_sem();
console_suspended = 0;
console_unlock();
}
@@ -1933,12 +2049,11 @@ void console_lock(void)
{
might_sleep();
- down(&console_sem);
+ down_console_sem();
if (console_suspended)
return;
console_locked = 1;
console_may_schedule = 1;
- mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
}
EXPORT_SYMBOL(console_lock);
@@ -1952,15 +2067,14 @@ EXPORT_SYMBOL(console_lock);
*/
int console_trylock(void)
{
- if (down_trylock(&console_sem))
+ if (down_trylock_console_sem())
return 0;
if (console_suspended) {
- up(&console_sem);
+ up_console_sem();
return 0;
}
console_locked = 1;
console_may_schedule = 0;
- mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
return 1;
}
EXPORT_SYMBOL(console_trylock);
@@ -2022,7 +2136,7 @@ void console_unlock(void)
bool retry;
if (console_suspended) {
- up(&console_sem);
+ up_console_sem();
return;
}
@@ -2043,10 +2157,15 @@ again:
}
if (console_seq < log_first_seq) {
+ len = sprintf(text, "** %u printk messages dropped ** ",
+ (unsigned)(log_first_seq - console_seq));
+
/* messages are gone, move to first one */
console_seq = log_first_seq;
console_idx = log_first_idx;
console_prev = 0;
+ } else {
+ len = 0;
}
skip:
if (console_seq == log_next_seq)
@@ -2071,8 +2190,8 @@ skip:
}
level = msg->level;
- len = msg_print_text(msg, console_prev, false,
- text, sizeof(text));
+ len += msg_print_text(msg, console_prev, false,
+ text + len, sizeof(text) - len);
console_idx = log_next(console_idx);
console_seq++;
console_prev = msg->flags;
@@ -2084,7 +2203,6 @@ skip:
local_irq_restore(flags);
}
console_locked = 0;
- mutex_release(&console_lock_dep_map, 1, _RET_IP_);
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
@@ -2092,7 +2210,7 @@ skip:
raw_spin_unlock(&logbuf_lock);
- up(&console_sem);
+ up_console_sem();
/*
* Someone could have filled up the buffer again, so re-check if there's
@@ -2137,7 +2255,7 @@ void console_unblank(void)
* oops_in_progress is set to 1..
*/
if (oops_in_progress) {
- if (down_trylock(&console_sem) != 0)
+ if (down_trylock_console_sem() != 0)
return;
} else
console_lock();
@@ -2437,21 +2555,19 @@ late_initcall(printk_late_init);
/*
* Delayed printk version, for scheduler-internal messages:
*/
-#define PRINTK_BUF_SIZE 512
-
#define PRINTK_PENDING_WAKEUP 0x01
-#define PRINTK_PENDING_SCHED 0x02
+#define PRINTK_PENDING_OUTPUT 0x02
static DEFINE_PER_CPU(int, printk_pending);
-static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
int pending = __this_cpu_xchg(printk_pending, 0);
- if (pending & PRINTK_PENDING_SCHED) {
- char *buf = __get_cpu_var(printk_sched_buf);
- pr_warn("[sched_delayed] %s", buf);
+ if (pending & PRINTK_PENDING_OUTPUT) {
+ /* If trylock fails, someone else is doing the printing */
+ if (console_trylock())
+ console_unlock();
}
if (pending & PRINTK_PENDING_WAKEUP)
@@ -2473,23 +2589,19 @@ void wake_up_klogd(void)
preempt_enable();
}
-int printk_sched(const char *fmt, ...)
+int printk_deferred(const char *fmt, ...)
{
- unsigned long flags;
va_list args;
- char *buf;
int r;
- local_irq_save(flags);
- buf = __get_cpu_var(printk_sched_buf);
-
+ preempt_disable();
va_start(args, fmt);
- r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args);
+ r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args);
va_end(args);
- __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+ __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
- local_irq_restore(flags);
+ preempt_enable();
return r;
}
diff --git a/kernel/profile.c b/kernel/profile.c
index cb980f0c731b..54bf5ba26420 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -52,9 +52,9 @@ static DEFINE_MUTEX(profile_flip_mutex);
int profile_setup(char *str)
{
- static char schedstr[] = "schedule";
- static char sleepstr[] = "sleep";
- static char kvmstr[] = "kvm";
+ static const char schedstr[] = "schedule";
+ static const char sleepstr[] = "sleep";
+ static const char kvmstr[] = "kvm";
int par;
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
@@ -64,12 +64,10 @@ int profile_setup(char *str)
str += strlen(sleepstr) + 1;
if (get_option(&str, &par))
prof_shift = par;
- printk(KERN_INFO
- "kernel sleep profiling enabled (shift: %ld)\n",
+ pr_info("kernel sleep profiling enabled (shift: %ld)\n",
prof_shift);
#else
- printk(KERN_WARNING
- "kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
+ pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
#endif /* CONFIG_SCHEDSTATS */
} else if (!strncmp(str, schedstr, strlen(schedstr))) {
prof_on = SCHED_PROFILING;
@@ -77,8 +75,7 @@ int profile_setup(char *str)
str += strlen(schedstr) + 1;
if (get_option(&str, &par))
prof_shift = par;
- printk(KERN_INFO
- "kernel schedule profiling enabled (shift: %ld)\n",
+ pr_info("kernel schedule profiling enabled (shift: %ld)\n",
prof_shift);
} else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
prof_on = KVM_PROFILING;
@@ -86,13 +83,12 @@ int profile_setup(char *str)
str += strlen(kvmstr) + 1;
if (get_option(&str, &par))
prof_shift = par;
- printk(KERN_INFO
- "kernel KVM profiling enabled (shift: %ld)\n",
+ pr_info("kernel KVM profiling enabled (shift: %ld)\n",
prof_shift);
} else if (get_option(&str, &par)) {
prof_shift = par;
prof_on = CPU_PROFILING;
- printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
+ pr_info("kernel profiling enabled (shift: %ld)\n",
prof_shift);
}
return 1;
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 662c83fc16b7..a3a9e240fcdb 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -388,15 +388,22 @@ static int __init reboot_setup(char *str)
break;
case 's':
- if (isdigit(*(str+1)))
- reboot_cpu = simple_strtoul(str+1, NULL, 0);
- else if (str[1] == 'm' && str[2] == 'p' &&
- isdigit(*(str+3)))
- reboot_cpu = simple_strtoul(str+3, NULL, 0);
- else
+ {
+ int rc;
+
+ if (isdigit(*(str+1))) {
+ rc = kstrtoint(str+1, 0, &reboot_cpu);
+ if (rc)
+ return rc;
+ } else if (str[1] == 'm' && str[2] == 'p' &&
+ isdigit(*(str+3))) {
+ rc = kstrtoint(str+3, 0, &reboot_cpu);
+ if (rc)
+ return rc;
+ } else
reboot_mode = REBOOT_SOFT;
break;
-
+ }
case 'g':
reboot_mode = REBOOT_GPIO;
break;
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 51dbac6a3633..85ea94305015 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -136,6 +136,8 @@ res_counter_member(struct res_counter *counter, int member)
return &counter->failcnt;
case RES_SOFT_LIMIT:
return &counter->soft_limit;
+ case RES_LOW_LIMIT:
+ return &counter->low_limit;
};
BUG();
@@ -186,8 +188,11 @@ int res_counter_memparse_write_strategy(const char *buf,
/* return RES_COUNTER_MAX(unlimited) if "-1" is specified */
if (*buf == '-') {
- res = simple_strtoull(buf + 1, &end, 10);
- if (res != 1 || *end != '\0')
+ int rc = kstrtoull(buf + 1, 10, &res);
+
+ if (rc)
+ return rc;
+ if (res != 1)
return -EINVAL;
*resp = RES_COUNTER_MAX;
return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9fb705b163a1..1e5c71ca3011 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1367,7 +1367,7 @@ out:
* leave kernel.
*/
if (p->mm && printk_ratelimit()) {
- printk_sched("process %d (%s) no longer affine to cpu%d\n",
+ printk_deferred("process %d (%s) no longer affine to cpu%d\n",
task_pid_nr(p), p->comm, cpu);
}
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 800e99b99075..3fa3fac9d819 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -348,12 +348,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* entity.
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
- static bool lag_once = false;
-
- if (!lag_once) {
- lag_once = true;
- printk_sched("sched: DL replenish lagged to much\n");
- }
+ printk_deferred_once("sched: DL replenish lagged to much\n");
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
dl_se->runtime = pi_se->dl_runtime;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7795e292f4c9..ea4d50089631 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -890,14 +890,8 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
* but accrue some time due to boosting.
*/
if (likely(rt_b->rt_runtime)) {
- static bool once = false;
-
rt_rq->rt_throttled = 1;
-
- if (!once) {
- once = true;
- printk_sched("sched: RT throttling activated\n");
- }
+ printk_deferred_once("sched: RT throttling activated\n");
} else {
/*
* In case we did anyway, make it go away,
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7e02d624cc50..27bf38be878b 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -39,7 +39,7 @@
* is only needed for handling filters shared across tasks.
* @prev: points to a previously installed, or inherited, filter
* @len: the number of instructions in the program
- * @insns: the BPF program instructions to evaluate
+ * @insnsi: the BPF program instructions to evaluate
*
* seccomp_filter objects are organized in a tree linked via the @prev
* pointer. For any task, it appears to be a singly-linked list starting
@@ -220,7 +220,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
return -ENOMEM;
/*
- * Installing a seccomp filter requires that the task have
+ * Installing a seccomp filter requires that the task has
* CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
* This avoids scenarios where unprivileged tasks can affect the
* behavior of privileged children.
diff --git a/kernel/signal.c b/kernel/signal.c
index 513e8c252aa4..87e07e739977 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -705,11 +705,8 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state)
* Returns 1 if any signals were found.
*
* All callers must be holding the siglock.
- *
- * This version takes a sigset mask and looks at all signals,
- * not just those in the first mask word.
*/
-static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
+static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
{
struct sigqueue *q, *n;
sigset_t m;
@@ -727,29 +724,6 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
}
return 1;
}
-/*
- * Remove signals in mask from the pending set and queue.
- * Returns 1 if any signals were found.
- *
- * All callers must be holding the siglock.
- */
-static int rm_from_queue(unsigned long mask, struct sigpending *s)
-{
- struct sigqueue *q, *n;
-
- if (!sigtestsetmask(&s->signal, mask))
- return 0;
-
- sigdelsetmask(&s->signal, mask);
- list_for_each_entry_safe(q, n, &s->list, list) {
- if (q->info.si_signo < SIGRTMIN &&
- (mask & sigmask(q->info.si_signo))) {
- list_del_init(&q->list);
- __sigqueue_free(q);
- }
- }
- return 1;
-}
static inline int is_si_special(const struct siginfo *info)
{
@@ -861,6 +835,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
{
struct signal_struct *signal = p->signal;
struct task_struct *t;
+ sigset_t flush;
if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
if (signal->flags & SIGNAL_GROUP_COREDUMP)
@@ -872,26 +847,25 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
/*
* This is a stop signal. Remove SIGCONT from all queues.
*/
- rm_from_queue(sigmask(SIGCONT), &signal->shared_pending);
- t = p;
- do {
- rm_from_queue(sigmask(SIGCONT), &t->pending);
- } while_each_thread(p, t);
+ siginitset(&flush, sigmask(SIGCONT));
+ flush_sigqueue_mask(&flush, &signal->shared_pending);
+ for_each_thread(p, t)
+ flush_sigqueue_mask(&flush, &t->pending);
} else if (sig == SIGCONT) {
unsigned int why;
/*
* Remove all stop signals from all queues, wake all threads.
*/
- rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
- t = p;
- do {
+ siginitset(&flush, SIG_KERNEL_STOP_MASK);
+ flush_sigqueue_mask(&flush, &signal->shared_pending);
+ for_each_thread(p, t) {
+ flush_sigqueue_mask(&flush, &t->pending);
task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
- rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
if (likely(!(t->ptrace & PT_SEIZED)))
wake_up_state(t, __TASK_STOPPED);
else
ptrace_trap_notify(t);
- } while_each_thread(p, t);
+ }
/*
* Notify the parent with CLD_CONTINUED if we were stopped.
@@ -2858,7 +2832,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, &tsk->real_blocked);
- siginitset(&tsk->real_blocked, 0);
+ sigemptyset(&tsk->real_blocked);
sig = dequeue_signal(tsk, &mask, info);
}
spin_unlock_irq(&tsk->sighand->siglock);
@@ -3095,18 +3069,39 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
}
#endif
+/*
+ * For kthreads only, must not be used if cloned with CLONE_SIGHAND
+ */
+void kernel_sigaction(int sig, __sighandler_t action)
+{
+ spin_lock_irq(&current->sighand->siglock);
+ current->sighand->action[sig - 1].sa.sa_handler = action;
+ if (action == SIG_IGN) {
+ sigset_t mask;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, sig);
+
+ flush_sigqueue_mask(&mask, &current->signal->shared_pending);
+ flush_sigqueue_mask(&mask, &current->pending);
+ recalc_sigpending();
+ }
+ spin_unlock_irq(&current->sighand->siglock);
+}
+EXPORT_SYMBOL(kernel_sigaction);
+
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
- struct task_struct *t = current;
+ struct task_struct *p = current, *t;
struct k_sigaction *k;
sigset_t mask;
if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
return -EINVAL;
- k = &t->sighand->action[sig-1];
+ k = &p->sighand->action[sig-1];
- spin_lock_irq(&current->sighand->siglock);
+ spin_lock_irq(&p->sighand->siglock);
if (oact)
*oact = *k;
@@ -3125,21 +3120,20 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
* (for example, SIGCHLD), shall cause the pending signal to
* be discarded, whether or not it is blocked"
*/
- if (sig_handler_ignored(sig_handler(t, sig), sig)) {
+ if (sig_handler_ignored(sig_handler(p, sig), sig)) {
sigemptyset(&mask);
sigaddset(&mask, sig);
- rm_from_queue_full(&mask, &t->signal->shared_pending);
- do {
- rm_from_queue_full(&mask, &t->pending);
- } while_each_thread(current, t);
+ flush_sigqueue_mask(&mask, &p->signal->shared_pending);
+ for_each_thread(p, t)
+ flush_sigqueue_mask(&mask, &t->pending);
}
}
- spin_unlock_irq(&current->sighand->siglock);
+ spin_unlock_irq(&p->sighand->siglock);
return 0;
}
-static int
+static int
do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
{
stack_t oss;
@@ -3500,7 +3494,7 @@ COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
}
#endif
-#ifdef __ARCH_WANT_SYS_SGETMASK
+#ifdef CONFIG_SGETMASK_SYSCALL
/*
* For backwards compatibility. Functionality superseded by sigprocmask.
@@ -3521,7 +3515,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask)
return old;
}
-#endif /* __ARCH_WANT_SGETMASK */
+#endif /* CONFIG_SGETMASK_SYSCALL */
#ifdef __ARCH_WANT_SYS_SIGNAL
/*
diff --git a/kernel/smp.c b/kernel/smp.c
index 7589be5b6a1d..6fc6dedda538 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -185,14 +185,24 @@ void generic_smp_call_function_single_interrupt(void)
{
struct llist_node *entry;
struct call_single_data *csd, *csd_next;
+ static bool warned;
+
+ entry = llist_del_all(&__get_cpu_var(call_single_queue));
+ entry = llist_reverse_order(entry);
/*
* Shouldn't receive this interrupt on a cpu that is not yet online.
*/
- WARN_ON_ONCE(!cpu_online(smp_processor_id()));
-
- entry = llist_del_all(&__get_cpu_var(call_single_queue));
- entry = llist_reverse_order(entry);
+ if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
+ warned = true;
+ WARN_ON(1);
+ /*
+ * We don't have to use the _safe() variant here
+ * because we are not invoking the IPI handlers yet.
+ */
+ llist_for_each_entry(csd, entry, llist)
+ pr_warn("SMP IPI Payload: %pS \n", csd->func);
+ }
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
csd->func(csd->info);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 01fbae5b97b7..03c77ced3339 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -130,8 +130,10 @@ enum multi_stop_state {
MULTI_STOP_NONE,
/* Awaiting everyone to be scheduled. */
MULTI_STOP_PREPARE,
- /* Disable interrupts. */
- MULTI_STOP_DISABLE_IRQ,
+ /* Disable interrupts on CPUs not in ->active_cpus mask. */
+ MULTI_STOP_DISABLE_IRQ_INACTIVE,
+ /* Disable interrupts on CPUs in ->active_cpus mask. */
+ MULTI_STOP_DISABLE_IRQ_ACTIVE,
/* Run the function */
MULTI_STOP_RUN,
/* Exit */
@@ -189,10 +191,27 @@ static int multi_cpu_stop(void *data)
do {
/* Chill out and ensure we re-read multi_stop_state. */
cpu_relax();
+
+ /*
+ * In the case of CPU offline, we don't want the other CPUs to
+ * send IPIs to the active_cpu (the one going offline) after it
+ * has disabled interrupts in the _DISABLE_IRQ state (because,
+ * then it will notice the IPIs only after it goes offline). So
+ * we split this state into _INACTIVE and _ACTIVE, and thereby
+ * ensure that the active_cpu disables interrupts only after
+ * the other CPUs do the same thing.
+ */
+
if (msdata->state != curstate) {
curstate = msdata->state;
switch (curstate) {
- case MULTI_STOP_DISABLE_IRQ:
+ case MULTI_STOP_DISABLE_IRQ_INACTIVE:
+ if (is_active)
+ break;
+
+ /* Else, fall-through */
+
+ case MULTI_STOP_DISABLE_IRQ_ACTIVE:
local_irq_disable();
hard_irq_disable();
break;
@@ -307,6 +326,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
* @cpu: cpu to stop
* @fn: function to execute
* @arg: argument to @fn
+ * @work_buf: pointer to cpu_stop_work structure
*
* Similar to stop_one_cpu() but doesn't wait for completion. The
* caller is responsible for ensuring @work_buf is currently unused
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bc8d1b74a6b9..36441b51b5df 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -135,6 +135,8 @@ cond_syscall(sys_setresgid16);
cond_syscall(sys_setresuid16);
cond_syscall(sys_setreuid16);
cond_syscall(sys_setuid16);
+cond_syscall(sys_sgetmask);
+cond_syscall(sys_ssetmask);
cond_syscall(sys_vm86old);
cond_syscall(sys_vm86);
cond_syscall(sys_ipc);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 580044080803..737547e4fba4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -173,6 +173,13 @@ extern int no_unaligned_warning;
#endif
#ifdef CONFIG_PROC_SYSCTL
+
+#define SYSCTL_WRITES_LEGACY -1
+#define SYSCTL_WRITES_WARN 0
+#define SYSCTL_WRITES_STRICT 1
+
+static int sysctl_writes_strict = SYSCTL_WRITES_WARN;
+
static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
static int proc_taint(struct ctl_table *table, int write,
@@ -495,6 +502,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_taint,
},
+ {
+ .procname = "sysctl_writes_strict",
+ .data = &sysctl_writes_strict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &neg_one,
+ .extra2 = &one,
+ },
#endif
#ifdef CONFIG_LATENCYTOP
{
@@ -1703,8 +1719,8 @@ int __init sysctl_init(void)
#ifdef CONFIG_PROC_SYSCTL
-static int _proc_do_string(void* data, int maxlen, int write,
- void __user *buffer,
+static int _proc_do_string(char *data, int maxlen, int write,
+ char __user *buffer,
size_t *lenp, loff_t *ppos)
{
size_t len;
@@ -1717,21 +1733,30 @@ static int _proc_do_string(void* data, int maxlen, int write,
}
if (write) {
- len = 0;
+ if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
+ /* Only continue writes not past the end of buffer. */
+ len = strlen(data);
+ if (len > maxlen - 1)
+ len = maxlen - 1;
+
+ if (*ppos > len)
+ return 0;
+ len = *ppos;
+ } else {
+ /* Start writing from beginning of buffer. */
+ len = 0;
+ }
+
+ *ppos += *lenp;
p = buffer;
- while (len < *lenp) {
+ while ((p - buffer) < *lenp && len < maxlen - 1) {
if (get_user(c, p++))
return -EFAULT;
if (c == 0 || c == '\n')
break;
- len++;
+ data[len++] = c;
}
- if (len >= maxlen)
- len = maxlen-1;
- if(copy_from_user(data, buffer, len))
- return -EFAULT;
- ((char *) data)[len] = 0;
- *ppos += *lenp;
+ data[len] = 0;
} else {
len = strlen(data);
if (len > maxlen)
@@ -1748,10 +1773,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
if (len > *lenp)
len = *lenp;
if (len)
- if(copy_to_user(buffer, data, len))
+ if (copy_to_user(buffer, data, len))
return -EFAULT;
if (len < *lenp) {
- if(put_user('\n', ((char __user *) buffer) + len))
+ if (put_user('\n', buffer + len))
return -EFAULT;
len++;
}
@@ -1761,6 +1786,14 @@ static int _proc_do_string(void* data, int maxlen, int write,
return 0;
}
+static void warn_sysctl_write(struct ctl_table *table)
+{
+ pr_warn_once("%s wrote to %s when file position was not 0!\n"
+ "This will not be supported in the future. To silence this\n"
+ "warning, set kernel.sysctl_writes_strict = -1\n",
+ current->comm, table->procname);
+}
+
/**
* proc_dostring - read a string sysctl
* @table: the sysctl table
@@ -1781,8 +1814,11 @@ static int _proc_do_string(void* data, int maxlen, int write,
int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return _proc_do_string(table->data, table->maxlen, write,
- buffer, lenp, ppos);
+ if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
+ warn_sysctl_write(table);
+
+ return _proc_do_string((char *)(table->data), table->maxlen, write,
+ (char __user *)buffer, lenp, ppos);
}
static size_t proc_skip_spaces(char **buf)
@@ -1956,6 +1992,18 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
conv = do_proc_dointvec_conv;
if (write) {
+ if (*ppos) {
+ switch (sysctl_writes_strict) {
+ case SYSCTL_WRITES_STRICT:
+ goto out;
+ case SYSCTL_WRITES_WARN:
+ warn_sysctl_write(table);
+ break;
+ default:
+ break;
+ }
+ }
+
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
page = __get_free_page(GFP_TEMPORARY);
@@ -2013,6 +2061,7 @@ free:
return err ? : -EINVAL;
}
*lenp -= left;
+out:
*ppos += *lenp;
return err;
}
@@ -2205,6 +2254,18 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
left = *lenp;
if (write) {
+ if (*ppos) {
+ switch (sysctl_writes_strict) {
+ case SYSCTL_WRITES_STRICT:
+ goto out;
+ case SYSCTL_WRITES_WARN:
+ warn_sysctl_write(table);
+ break;
+ default:
+ break;
+ }
+ }
+
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
page = __get_free_page(GFP_TEMPORARY);
@@ -2260,6 +2321,7 @@ free:
return err ? : -EINVAL;
}
*lenp -= left;
+out:
*ppos += *lenp;
return err;
}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 419a52cecd20..3c6cb3116467 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -786,8 +786,9 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
time_status |= STA_PPSERROR;
pps_errcnt++;
pps_dec_freq_interval();
- pr_err("hardpps: PPSERROR: interval too long - %ld s\n",
- freq_norm.sec);
+ printk_deferred(KERN_ERR
+ "hardpps: PPSERROR: interval too long - %ld s\n",
+ freq_norm.sec);
return 0;
}
@@ -800,7 +801,8 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
pps_freq = ftemp;
if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
- pr_warning("hardpps: PPSWANDER: change=%ld\n", delta);
+ printk_deferred(KERN_WARNING
+ "hardpps: PPSWANDER: change=%ld\n", delta);
time_status |= STA_PPSWANDER;
pps_stbcnt++;
pps_dec_freq_interval();
@@ -844,8 +846,9 @@ static void hardpps_update_phase(long error)
* the time offset is updated.
*/
if (jitter > (pps_jitter << PPS_POPCORN)) {
- pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
- jitter, (pps_jitter << PPS_POPCORN));
+ printk_deferred(KERN_WARNING
+ "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
+ jitter, (pps_jitter << PPS_POPCORN));
time_status |= STA_PPSJITTER;
pps_jitcnt++;
} else if (time_status & STA_PPSTIME) {
@@ -902,7 +905,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
- pr_err("hardpps: PPSJITTER: bad pulse\n");
+ printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");
return;
}
@@ -923,7 +926,10 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
static int __init ntp_tick_adj_setup(char *str)
{
- ntp_tick_adj = simple_strtol(str, NULL, 0);
+ int rc = kstrtol(str, 0, (long *)&ntp_tick_adj);
+
+ if (rc)
+ return rc;
ntp_tick_adj <<= NTP_SCALE_SHIFT;
return 1;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 4d23dc4d8139..5038b4d3b76d 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -154,6 +154,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
raw_write_seqcount_end(&cd.seq);
r = rate;
+ /*
+ * Use 4MHz instead of 1MHz so that things like 1.832Mhz show as
+ * 1832Khz
+ */
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f7df8ea21707..32d8d6aaedb8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -852,8 +852,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
struct timespec *delta)
{
if (!timespec_valid_strict(delta)) {
- printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
- "sleep delta value!\n");
+ printk_deferred(KERN_WARNING
+ "__timekeeping_inject_sleeptime: Invalid "
+ "sleep delta value!\n");
return;
}
tk_xtime_add(tk, delta);
@@ -1157,7 +1158,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
if (unlikely(tk->clock->maxadj &&
(tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
- printk_once(KERN_WARNING
+ printk_deferred_once(KERN_WARNING
"Adjusting %s more than 11%% (%ld vs %ld)\n",
tk->clock->name, (long)tk->mult + adj,
(long)tk->clock->mult + tk->clock->maxadj);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 6620e5837ce2..33cbd8c203f8 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -239,6 +239,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
* tracepoint_probe_register - Connect a probe to a tracepoint
* @tp: tracepoint
* @probe: probe handler
+ * @data: tracepoint data
*
* Returns 0 if ok, error value on error.
* Note: if @tp is within a module, the caller is responsible for
@@ -264,6 +265,7 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
* tracepoint_probe_unregister - Disconnect a probe from a tracepoint
* @tp: tracepoint
* @probe: probe function pointer
+ * @data: tracepoint data
*
* Returns 0 if ok, error value on error.
*/
diff --git a/kernel/user.c b/kernel/user.c
index 294fc6a94168..4efa39350e44 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -87,7 +87,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
struct user_struct root_user = {
.__count = ATOMIC_INIT(1),
.processes = ATOMIC_INIT(1),
- .files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
.uid = GLOBAL_ROOT_UID,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index bf71b4b2d632..fcc02560fd6b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -286,7 +286,7 @@ EXPORT_SYMBOL(from_kuid_munged);
/**
* make_kgid - Map a user-namespace gid pair into a kgid.
* @ns: User namespace that the gid is in
- * @uid: group identifier
+ * @gid: group identifier
*
* Maps a user-namespace gid pair into a kernel internal kgid,
* and returns that kgid.
@@ -482,7 +482,8 @@ static int projid_m_show(struct seq_file *seq, void *v)
return 0;
}
-static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map)
+static void *m_start(struct seq_file *seq, loff_t *ppos,
+ struct uid_gid_map *map)
{
struct uid_gid_extent *extent = NULL;
loff_t pos = *ppos;
@@ -546,7 +547,8 @@ struct seq_operations proc_projid_seq_operations = {
.show = projid_m_show,
};
-static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent *extent)
+static bool mappings_overlap(struct uid_gid_map *new_map,
+ struct uid_gid_extent *extent)
{
u32 upper_first, lower_first, upper_last, lower_last;
unsigned idx;
@@ -653,7 +655,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
ret = -EINVAL;
pos = kbuf;
new_map.nr_extents = 0;
- for (;pos; pos = next_line) {
+ for (; pos; pos = next_line) {
extent = &new_map.extent[new_map.nr_extents];
/* Find the end of line and ensure I don't look past it */
@@ -687,13 +689,16 @@ static ssize_t map_write(struct file *file, const char __user *buf,
/* Verify we have been given valid starting values */
if ((extent->first == (u32) -1) ||
- (extent->lower_first == (u32) -1 ))
+ (extent->lower_first == (u32) -1))
goto out;
- /* Verify count is not zero and does not cause the extent to wrap */
+ /* Verify count is not zero and does not cause the
+ * extent to wrap
+ */
if ((extent->first + extent->count) <= extent->first)
goto out;
- if ((extent->lower_first + extent->count) <= extent->lower_first)
+ if ((extent->lower_first + extent->count) <=
+ extent->lower_first)
goto out;
/* Do the ranges in extent overlap any previous extents? */
@@ -751,7 +756,8 @@ out:
return ret;
}
-ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
+ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *ppos)
{
struct seq_file *seq = file->private_data;
struct user_namespace *ns = seq->private;
@@ -767,7 +773,8 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t siz
&ns->uid_map, &ns->parent->uid_map);
}
-ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
+ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *ppos)
{
struct seq_file *seq = file->private_data;
struct user_namespace *ns = seq->private;
@@ -783,7 +790,8 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz
&ns->gid_map, &ns->parent->gid_map);
}
-ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
+ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *ppos)
{
struct seq_file *seq = file->private_data;
struct user_namespace *ns = seq->private;
@@ -800,7 +808,7 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
&ns->projid_map, &ns->parent->projid_map);
}
-static bool new_idmap_permitted(const struct file *file,
+static bool new_idmap_permitted(const struct file *file,
struct user_namespace *ns, int cap_setid,
struct uid_gid_map *new_map)
{
@@ -811,8 +819,7 @@ static bool new_idmap_permitted(const struct file *file,
kuid_t uid = make_kuid(ns->parent, id);
if (uid_eq(uid, file->f_cred->fsuid))
return true;
- }
- else if (cap_setid == CAP_SETGID) {
+ } else if (cap_setid == CAP_SETGID) {
kgid_t gid = make_kgid(ns->parent, id);
if (gid_eq(gid, file->f_cred->fsgid))
return true;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 4f69f9a5e221..6fbe811c7ad1 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -51,7 +51,7 @@ static int proc_do_uts_string(ctl_table *table, int write,
int r;
memcpy(&uts_table, table, sizeof(uts_table));
uts_table.data = get_uts(table, write);
- r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
+ r = proc_dostring(&uts_table, write, buffer, lenp, ppos);
put_uts(table, write, uts_table.data);
if (write)
@@ -135,4 +135,4 @@ static int __init utsname_sysctl_init(void)
return 0;
}
-__initcall(utsname_sysctl_init);
+device_initcall(utsname_sysctl_init);
diff --git a/lib/Kconfig b/lib/Kconfig
index 334f7722a999..325a8d4efda2 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -177,6 +177,13 @@ config CRC8
when they need to do cyclic redundancy check according CRC8
algorithm. Module will be called crc8.
+config CRC64_ECMA
+ tristate "CRC64 ECMA function"
+ help
+ This option provides CRC64 ECMA function. Drivers may select this
+ when they need to do cyclic redundancy check according to the CRC64
+ ECMA algorithm.
+
config AUDIT_GENERIC
bool
depends on AUDIT && !AUDIT_ARCH
@@ -474,4 +481,11 @@ config UCS2_STRING
source "lib/fonts/Kconfig"
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+ def_bool n
+
endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 182ea682bd8f..fb4889a769f6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -501,6 +501,16 @@ config DEBUG_VM
If unsure, say N.
+config DEBUG_VM_VMACACHE
+ bool "Debug VMA caching"
+ depends on DEBUG_VM
+ help
+ Enable this to turn on VMA caching debug information. Doing so
+ can cause significant overhead, so only enable it in non-production
+ environments.
+
+ If unsure, say N.
+
config DEBUG_VM_RB
bool "Debug VM red-black trees"
depends on DEBUG_VM
@@ -823,11 +833,6 @@ config DEBUG_RT_MUTEXES
This allows rt mutex semantics violations and rt mutex related
deadlocks (lockups) to be detected and reported automatically.
-config DEBUG_PI_LIST
- bool
- default y
- depends on DEBUG_RT_MUTEXES
-
config RT_MUTEX_TESTER
bool "Built-in scriptable tester for rt-mutexes"
depends on DEBUG_KERNEL && RT_MUTEXES
@@ -1053,6 +1058,16 @@ config DEBUG_LIST
If unsure, say N.
+config DEBUG_PI_LIST
+ bool "Debug priority linked list manipulation"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on extended checks in the priority-ordered
+ linked-list (plist) walking routines. This checks the entire
+ list multiple times during each manipulation.
+
+ If unsure, say N.
+
config DEBUG_SG
bool "Debug SG table operations"
depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 69e8bbfa07e5..5706c01266cc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
+obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o
obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
diff --git a/lib/btree.c b/lib/btree.c
index f9a484676cb6..4264871ea1a0 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -198,6 +198,7 @@ EXPORT_SYMBOL_GPL(btree_init);
void btree_destroy(struct btree_head *head)
{
+ mempool_free(head->node, head->mempool);
mempool_destroy(head->mempool);
head->mempool = NULL;
}
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644
index 000000000000..41629ea5a60c
--- /dev/null
+++ b/lib/crc64_ecma.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK 0xFF
+#define CRC64_TABLE_SIZE 256
+
+
+struct crc64_table {
+ u64 seed;
+ u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+ CRC64_DEFAULT_INITVAL,
+ {
+ 0x0000000000000000ULL,
+ 0xb32e4cbe03a75f6fULL,
+ 0xf4843657a840a05bULL,
+ 0x47aa7ae9abe7ff34ULL,
+ 0x7bd0c384ff8f5e33ULL,
+ 0xc8fe8f3afc28015cULL,
+ 0x8f54f5d357cffe68ULL,
+ 0x3c7ab96d5468a107ULL,
+ 0xf7a18709ff1ebc66ULL,
+ 0x448fcbb7fcb9e309ULL,
+ 0x0325b15e575e1c3dULL,
+ 0xb00bfde054f94352ULL,
+ 0x8c71448d0091e255ULL,
+ 0x3f5f08330336bd3aULL,
+ 0x78f572daa8d1420eULL,
+ 0xcbdb3e64ab761d61ULL,
+ 0x7d9ba13851336649ULL,
+ 0xceb5ed8652943926ULL,
+ 0x891f976ff973c612ULL,
+ 0x3a31dbd1fad4997dULL,
+ 0x064b62bcaebc387aULL,
+ 0xb5652e02ad1b6715ULL,
+ 0xf2cf54eb06fc9821ULL,
+ 0x41e11855055bc74eULL,
+ 0x8a3a2631ae2dda2fULL,
+ 0x39146a8fad8a8540ULL,
+ 0x7ebe1066066d7a74ULL,
+ 0xcd905cd805ca251bULL,
+ 0xf1eae5b551a2841cULL,
+ 0x42c4a90b5205db73ULL,
+ 0x056ed3e2f9e22447ULL,
+ 0xb6409f5cfa457b28ULL,
+ 0xfb374270a266cc92ULL,
+ 0x48190ecea1c193fdULL,
+ 0x0fb374270a266cc9ULL,
+ 0xbc9d3899098133a6ULL,
+ 0x80e781f45de992a1ULL,
+ 0x33c9cd4a5e4ecdceULL,
+ 0x7463b7a3f5a932faULL,
+ 0xc74dfb1df60e6d95ULL,
+ 0x0c96c5795d7870f4ULL,
+ 0xbfb889c75edf2f9bULL,
+ 0xf812f32ef538d0afULL,
+ 0x4b3cbf90f69f8fc0ULL,
+ 0x774606fda2f72ec7ULL,
+ 0xc4684a43a15071a8ULL,
+ 0x83c230aa0ab78e9cULL,
+ 0x30ec7c140910d1f3ULL,
+ 0x86ace348f355aadbULL,
+ 0x3582aff6f0f2f5b4ULL,
+ 0x7228d51f5b150a80ULL,
+ 0xc10699a158b255efULL,
+ 0xfd7c20cc0cdaf4e8ULL,
+ 0x4e526c720f7dab87ULL,
+ 0x09f8169ba49a54b3ULL,
+ 0xbad65a25a73d0bdcULL,
+ 0x710d64410c4b16bdULL,
+ 0xc22328ff0fec49d2ULL,
+ 0x85895216a40bb6e6ULL,
+ 0x36a71ea8a7ace989ULL,
+ 0x0adda7c5f3c4488eULL,
+ 0xb9f3eb7bf06317e1ULL,
+ 0xfe5991925b84e8d5ULL,
+ 0x4d77dd2c5823b7baULL,
+ 0x64b62bcaebc387a1ULL,
+ 0xd7986774e864d8ceULL,
+ 0x90321d9d438327faULL,
+ 0x231c512340247895ULL,
+ 0x1f66e84e144cd992ULL,
+ 0xac48a4f017eb86fdULL,
+ 0xebe2de19bc0c79c9ULL,
+ 0x58cc92a7bfab26a6ULL,
+ 0x9317acc314dd3bc7ULL,
+ 0x2039e07d177a64a8ULL,
+ 0x67939a94bc9d9b9cULL,
+ 0xd4bdd62abf3ac4f3ULL,
+ 0xe8c76f47eb5265f4ULL,
+ 0x5be923f9e8f53a9bULL,
+ 0x1c4359104312c5afULL,
+ 0xaf6d15ae40b59ac0ULL,
+ 0x192d8af2baf0e1e8ULL,
+ 0xaa03c64cb957be87ULL,
+ 0xeda9bca512b041b3ULL,
+ 0x5e87f01b11171edcULL,
+ 0x62fd4976457fbfdbULL,
+ 0xd1d305c846d8e0b4ULL,
+ 0x96797f21ed3f1f80ULL,
+ 0x2557339fee9840efULL,
+ 0xee8c0dfb45ee5d8eULL,
+ 0x5da24145464902e1ULL,
+ 0x1a083bacedaefdd5ULL,
+ 0xa9267712ee09a2baULL,
+ 0x955cce7fba6103bdULL,
+ 0x267282c1b9c65cd2ULL,
+ 0x61d8f8281221a3e6ULL,
+ 0xd2f6b4961186fc89ULL,
+ 0x9f8169ba49a54b33ULL,
+ 0x2caf25044a02145cULL,
+ 0x6b055fede1e5eb68ULL,
+ 0xd82b1353e242b407ULL,
+ 0xe451aa3eb62a1500ULL,
+ 0x577fe680b58d4a6fULL,
+ 0x10d59c691e6ab55bULL,
+ 0xa3fbd0d71dcdea34ULL,
+ 0x6820eeb3b6bbf755ULL,
+ 0xdb0ea20db51ca83aULL,
+ 0x9ca4d8e41efb570eULL,
+ 0x2f8a945a1d5c0861ULL,
+ 0x13f02d374934a966ULL,
+ 0xa0de61894a93f609ULL,
+ 0xe7741b60e174093dULL,
+ 0x545a57dee2d35652ULL,
+ 0xe21ac88218962d7aULL,
+ 0x5134843c1b317215ULL,
+ 0x169efed5b0d68d21ULL,
+ 0xa5b0b26bb371d24eULL,
+ 0x99ca0b06e7197349ULL,
+ 0x2ae447b8e4be2c26ULL,
+ 0x6d4e3d514f59d312ULL,
+ 0xde6071ef4cfe8c7dULL,
+ 0x15bb4f8be788911cULL,
+ 0xa6950335e42fce73ULL,
+ 0xe13f79dc4fc83147ULL,
+ 0x521135624c6f6e28ULL,
+ 0x6e6b8c0f1807cf2fULL,
+ 0xdd45c0b11ba09040ULL,
+ 0x9aefba58b0476f74ULL,
+ 0x29c1f6e6b3e0301bULL,
+ 0xc96c5795d7870f42ULL,
+ 0x7a421b2bd420502dULL,
+ 0x3de861c27fc7af19ULL,
+ 0x8ec62d7c7c60f076ULL,
+ 0xb2bc941128085171ULL,
+ 0x0192d8af2baf0e1eULL,
+ 0x4638a2468048f12aULL,
+ 0xf516eef883efae45ULL,
+ 0x3ecdd09c2899b324ULL,
+ 0x8de39c222b3eec4bULL,
+ 0xca49e6cb80d9137fULL,
+ 0x7967aa75837e4c10ULL,
+ 0x451d1318d716ed17ULL,
+ 0xf6335fa6d4b1b278ULL,
+ 0xb199254f7f564d4cULL,
+ 0x02b769f17cf11223ULL,
+ 0xb4f7f6ad86b4690bULL,
+ 0x07d9ba1385133664ULL,
+ 0x4073c0fa2ef4c950ULL,
+ 0xf35d8c442d53963fULL,
+ 0xcf273529793b3738ULL,
+ 0x7c0979977a9c6857ULL,
+ 0x3ba3037ed17b9763ULL,
+ 0x888d4fc0d2dcc80cULL,
+ 0x435671a479aad56dULL,
+ 0xf0783d1a7a0d8a02ULL,
+ 0xb7d247f3d1ea7536ULL,
+ 0x04fc0b4dd24d2a59ULL,
+ 0x3886b22086258b5eULL,
+ 0x8ba8fe9e8582d431ULL,
+ 0xcc0284772e652b05ULL,
+ 0x7f2cc8c92dc2746aULL,
+ 0x325b15e575e1c3d0ULL,
+ 0x8175595b76469cbfULL,
+ 0xc6df23b2dda1638bULL,
+ 0x75f16f0cde063ce4ULL,
+ 0x498bd6618a6e9de3ULL,
+ 0xfaa59adf89c9c28cULL,
+ 0xbd0fe036222e3db8ULL,
+ 0x0e21ac88218962d7ULL,
+ 0xc5fa92ec8aff7fb6ULL,
+ 0x76d4de52895820d9ULL,
+ 0x317ea4bb22bfdfedULL,
+ 0x8250e80521188082ULL,
+ 0xbe2a516875702185ULL,
+ 0x0d041dd676d77eeaULL,
+ 0x4aae673fdd3081deULL,
+ 0xf9802b81de97deb1ULL,
+ 0x4fc0b4dd24d2a599ULL,
+ 0xfceef8632775faf6ULL,
+ 0xbb44828a8c9205c2ULL,
+ 0x086ace348f355aadULL,
+ 0x34107759db5dfbaaULL,
+ 0x873e3be7d8faa4c5ULL,
+ 0xc094410e731d5bf1ULL,
+ 0x73ba0db070ba049eULL,
+ 0xb86133d4dbcc19ffULL,
+ 0x0b4f7f6ad86b4690ULL,
+ 0x4ce50583738cb9a4ULL,
+ 0xffcb493d702be6cbULL,
+ 0xc3b1f050244347ccULL,
+ 0x709fbcee27e418a3ULL,
+ 0x3735c6078c03e797ULL,
+ 0x841b8ab98fa4b8f8ULL,
+ 0xadda7c5f3c4488e3ULL,
+ 0x1ef430e13fe3d78cULL,
+ 0x595e4a08940428b8ULL,
+ 0xea7006b697a377d7ULL,
+ 0xd60abfdbc3cbd6d0ULL,
+ 0x6524f365c06c89bfULL,
+ 0x228e898c6b8b768bULL,
+ 0x91a0c532682c29e4ULL,
+ 0x5a7bfb56c35a3485ULL,
+ 0xe955b7e8c0fd6beaULL,
+ 0xaeffcd016b1a94deULL,
+ 0x1dd181bf68bdcbb1ULL,
+ 0x21ab38d23cd56ab6ULL,
+ 0x9285746c3f7235d9ULL,
+ 0xd52f0e859495caedULL,
+ 0x6601423b97329582ULL,
+ 0xd041dd676d77eeaaULL,
+ 0x636f91d96ed0b1c5ULL,
+ 0x24c5eb30c5374ef1ULL,
+ 0x97eba78ec690119eULL,
+ 0xab911ee392f8b099ULL,
+ 0x18bf525d915feff6ULL,
+ 0x5f1528b43ab810c2ULL,
+ 0xec3b640a391f4fadULL,
+ 0x27e05a6e926952ccULL,
+ 0x94ce16d091ce0da3ULL,
+ 0xd3646c393a29f297ULL,
+ 0x604a2087398eadf8ULL,
+ 0x5c3099ea6de60cffULL,
+ 0xef1ed5546e415390ULL,
+ 0xa8b4afbdc5a6aca4ULL,
+ 0x1b9ae303c601f3cbULL,
+ 0x56ed3e2f9e224471ULL,
+ 0xe5c372919d851b1eULL,
+ 0xa26908783662e42aULL,
+ 0x114744c635c5bb45ULL,
+ 0x2d3dfdab61ad1a42ULL,
+ 0x9e13b115620a452dULL,
+ 0xd9b9cbfcc9edba19ULL,
+ 0x6a978742ca4ae576ULL,
+ 0xa14cb926613cf817ULL,
+ 0x1262f598629ba778ULL,
+ 0x55c88f71c97c584cULL,
+ 0xe6e6c3cfcadb0723ULL,
+ 0xda9c7aa29eb3a624ULL,
+ 0x69b2361c9d14f94bULL,
+ 0x2e184cf536f3067fULL,
+ 0x9d36004b35545910ULL,
+ 0x2b769f17cf112238ULL,
+ 0x9858d3a9ccb67d57ULL,
+ 0xdff2a94067518263ULL,
+ 0x6cdce5fe64f6dd0cULL,
+ 0x50a65c93309e7c0bULL,
+ 0xe388102d33392364ULL,
+ 0xa4226ac498dedc50ULL,
+ 0x170c267a9b79833fULL,
+ 0xdcd7181e300f9e5eULL,
+ 0x6ff954a033a8c131ULL,
+ 0x28532e49984f3e05ULL,
+ 0x9b7d62f79be8616aULL,
+ 0xa707db9acf80c06dULL,
+ 0x14299724cc279f02ULL,
+ 0x5383edcd67c06036ULL,
+ 0xe0ada17364673f59ULL
+ }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+ return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+ unsigned int i;
+ u64 crc = seed;
+
+ for (i = 0; i < nbytes; i++)
+ crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+ (crc >> 8);
+
+ return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
diff --git a/lib/idr.c b/lib/idr.c
index 2642fa8e424d..39158abebad1 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -18,12 +18,6 @@
* pointer or what ever, we treat it as a (void *). You can pass this
* id to a user for him to pass back at a later time. You then pass
* that id to this code and it returns your pointer.
-
- * You can release ids at any time. When all ids are released, most of
- * the memory is returned (we keep MAX_IDR_FREE) in a local pool so we
- * don't need to go to the memory "store" during an id allocate, just
- * so you don't need to be too concerned about locking and conflicts
- * with the slab allocator.
*/
#ifndef TEST // to test in user space...
@@ -151,7 +145,7 @@ static void idr_layer_rcu_free(struct rcu_head *head)
static inline void free_layer(struct idr *idr, struct idr_layer *p)
{
- if (idr->hint && idr->hint == p)
+ if (idr->hint == p)
RCU_INIT_POINTER(idr->hint, NULL);
call_rcu(&p->rcu_head, idr_layer_rcu_free);
}
@@ -249,7 +243,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
/* if already at the top layer, we need to grow */
- if (id >= 1 << (idp->layers * IDR_BITS)) {
+ if (id > idr_max(idp->layers)) {
*starting_id = id;
return -EAGAIN;
}
@@ -562,6 +556,11 @@ void idr_remove(struct idr *idp, int id)
if (id < 0)
return;
+ if (id > idr_max(idp->layers)) {
+ idr_remove_warning(id);
+ return;
+ }
+
sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
idp->top->ary[0]) {
@@ -579,16 +578,6 @@ void idr_remove(struct idr *idp, int id)
bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
free_layer(idp, to_free);
}
- while (idp->id_free_cnt >= MAX_IDR_FREE) {
- p = get_from_free_list(idp);
- /*
- * Note: we don't call the rcu callback here, since the only
- * layers that fall into the freelist are those that have been
- * preallocated.
- */
- kmem_cache_free(idr_layer_cache, p);
- }
- return;
}
EXPORT_SYMBOL(idr_remove);
@@ -809,14 +798,12 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
p = idp->top;
if (!p)
- return ERR_PTR(-EINVAL);
-
- n = (p->layer+1) * IDR_BITS;
+ return ERR_PTR(-ENOENT);
- if (id >= (1 << n))
- return ERR_PTR(-EINVAL);
+ if (id > idr_max(p->layer + 1))
+ return ERR_PTR(-ENOENT);
- n -= IDR_BITS;
+ n = p->layer * IDR_BITS;
while ((n > 0) && p) {
p = p->ary[(id >> n) & IDR_MASK];
n -= IDR_BITS;
@@ -1027,6 +1014,9 @@ void ida_remove(struct ida *ida, int id)
int n;
struct ida_bitmap *bitmap;
+ if (idr_id > idr_max(ida->idr.layers))
+ goto err;
+
/* clear full bits while looking up the leaf idr_layer */
while ((shift > 0) && p) {
n = (idr_id >> shift) & IDR_MASK;
@@ -1042,7 +1032,7 @@ void ida_remove(struct ida *ida, int id)
__clear_bit(n, p->bitmap);
bitmap = (void *)p->ary[n];
- if (!test_bit(offset, bitmap->bitmap))
+ if (!bitmap || !test_bit(offset, bitmap->bitmap))
goto err;
/* update bitmap and remove it if empty */
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 244f5480c898..b3131f5cf8a2 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -62,10 +62,7 @@ EXPORT_SYMBOL(crc32c);
static int __init libcrc32c_mod_init(void)
{
tfm = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
-
- return 0;
+ return PTR_ERR_OR_ZERO(tfm);
}
static void __exit libcrc32c_mod_fini(void)
diff --git a/lib/plist.c b/lib/plist.c
index 1ebc95f7a46f..d408e774b746 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head *head)
plist_check_head(head);
}
+/**
+ * plist_requeue - Requeue @node at end of same-prio entries.
+ *
+ * This is essentially an optimized plist_del() followed by
+ * plist_add(). It moves an entry already in the plist to
+ * after any other same-priority entries.
+ *
+ * @node: &struct plist_node pointer - entry to be moved
+ * @head: &struct plist_head pointer - list head
+ */
+void plist_requeue(struct plist_node *node, struct plist_head *head)
+{
+ struct plist_node *iter;
+ struct list_head *node_next = &head->node_list;
+
+ plist_check_head(head);
+ BUG_ON(plist_head_empty(head));
+ BUG_ON(plist_node_empty(node));
+
+ if (node == plist_last(head))
+ return;
+
+ iter = plist_next(node);
+
+ if (node->prio != iter->prio)
+ return;
+
+ plist_del(node, head);
+
+ plist_for_each_continue(iter, head) {
+ if (node->prio != iter->prio) {
+ node_next = &iter->node_list;
+ break;
+ }
+ }
+ list_add_tail(&node->node_list, node_next);
+
+ plist_check_head(head);
+}
+
#ifdef CONFIG_DEBUG_PI_LIST
#include <linux/sched.h>
#include <linux/module.h>
@@ -170,12 +210,20 @@ static void __init plist_test_check(int nr_expect)
BUG_ON(prio_pos->prio_list.next != &first->prio_list);
}
+static void __init plist_test_requeue(struct plist_node *node)
+{
+ plist_requeue(node, &test_head);
+
+ if (node != plist_last(&test_head))
+ BUG_ON(node->prio == plist_next(node)->prio);
+}
+
static int __init plist_test(void)
{
int nr_expect = 0, i, loop;
unsigned int r = local_clock();
- pr_debug("start plist test\n");
+ printk(KERN_DEBUG "start plist test\n");
plist_head_init(&test_head);
for (i = 0; i < ARRAY_SIZE(test_node); i++)
plist_node_init(test_node + i, 0);
@@ -193,6 +241,10 @@ static int __init plist_test(void)
nr_expect--;
}
plist_test_check(nr_expect);
+ if (!plist_node_empty(test_node + i)) {
+ plist_test_requeue(test_node + i);
+ plist_test_check(nr_expect);
+ }
}
for (i = 0; i < ARRAY_SIZE(test_node); i++) {
@@ -203,7 +255,7 @@ static int __init plist_test(void)
plist_test_check(nr_expect);
}
- pr_debug("end plist test\n");
+ printk(KERN_DEBUG "end plist test\n");
return 0;
}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9599aa72d7a0..6ad8130f2053 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -27,6 +27,7 @@
#include <linux/radix-tree.h>
#include <linux/percpu.h>
#include <linux/slab.h>
+#include <linux/kmemleak.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/string.h>
@@ -194,12 +195,17 @@ radix_tree_node_alloc(struct radix_tree_root *root)
* succeed in getting a node here (and never reach
* kmem_cache_alloc)
*/
- rtp = &__get_cpu_var(radix_tree_preloads);
+ rtp = this_cpu_ptr(&radix_tree_preloads);
if (rtp->nr) {
ret = rtp->nodes[rtp->nr - 1];
rtp->nodes[rtp->nr - 1] = NULL;
rtp->nr--;
}
+ /*
+ * Update the allocation stack trace as this is more useful
+ * for debugging.
+ */
+ kmemleak_update_trace(ret);
}
if (ret == NULL)
ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
@@ -250,14 +256,14 @@ static int __radix_tree_preload(gfp_t gfp_mask)
int ret = -ENOMEM;
preempt_disable();
- rtp = &__get_cpu_var(radix_tree_preloads);
+ rtp = this_cpu_ptr(&radix_tree_preloads);
while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
preempt_enable();
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
if (node == NULL)
goto out;
preempt_disable();
- rtp = &__get_cpu_var(radix_tree_preloads);
+ rtp = this_cpu_ptr(&radix_tree_preloads);
if (rtp->nr < ARRAY_SIZE(rtp->nodes))
rtp->nodes[rtp->nr++] = node;
else
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3a8e8e8fb2a5..4251cbd5becb 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
**/
struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
struct scatterlist *ret = &sgl[nents - 1];
#else
struct scatterlist *sg, *ret = NULL;
@@ -251,7 +251,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
if (nents == 0)
return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
if (WARN_ON_ONCE(nents > max_ents))
return -EINVAL;
#endif
diff --git a/lib/string.c b/lib/string.c
index 9b1f9062a202..89ad0f035f48 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL(strcpy);
#ifndef __HAVE_ARCH_STRNCPY
/**
- * strncpy - Copy a length-limited, %NUL-terminated string
+ * strncpy - Copy a length-limited, C-string
* @dest: Where to copy the string to
* @src: Where to copy the string from
* @count: The maximum number of bytes to copy
@@ -136,7 +136,7 @@ EXPORT_SYMBOL(strncpy);
#ifndef __HAVE_ARCH_STRLCPY
/**
- * strlcpy - Copy a %NUL terminated string into a sized buffer
+ * strlcpy - Copy a C-string into a sized buffer
* @dest: Where to copy the string to
* @src: Where to copy the string from
* @size: size of destination buffer
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(strcat);
#ifndef __HAVE_ARCH_STRNCAT
/**
- * strncat - Append a length-limited, %NUL-terminated string to another
+ * strncat - Append a length-limited, C-string to another
* @dest: The string to be appended to
* @src: The string to append to it
* @count: The maximum numbers of bytes to copy
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(strncat);
#ifndef __HAVE_ARCH_STRLCAT
/**
- * strlcat - Append a length-limited, %NUL-terminated string to another
+ * strlcat - Append a length-limited, C-string to another
* @dest: The string to be appended to
* @src: The string to append to it
* @count: The size of the destination buffer.
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b604b831f4d1..649d097853a1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -374,7 +374,7 @@ void __init swiotlb_free(void)
io_tlb_nslabs = 0;
}
-static int is_swiotlb_buffer(phys_addr_t paddr)
+int is_swiotlb_buffer(phys_addr_t paddr)
{
return paddr >= io_tlb_start && paddr < io_tlb_end;
}
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0648291cdafe..0eced40344dd 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1183,6 +1183,21 @@ char *address_val(char *buf, char *end, const void *addr,
return number(buf, end, num, spec);
}
+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN];
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(name, tsk->comm, TASK_COMM_LEN);
+ name[sizeof(name) - 1] = '\0';
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;
/*
@@ -1250,6 +1265,7 @@ int kptr_restrict __read_mostly;
* (default assumed to be phys_addr_t, passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1261,7 +1277,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1389,6 +1405,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ return comm_name(buf, end, ptr, spec, fmt);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
@@ -2347,7 +2365,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
break;
base = 10;
- is_sign = 0;
+ is_sign = false;
switch (*fmt++) {
case 'c':
@@ -2386,7 +2404,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
case 'i':
base = 0;
case 'd':
- is_sign = 1;
+ is_sign = true;
case 'u':
break;
case '%':
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
index 08837db52d94..12d2d777f36b 100644
--- a/lib/xz/Kconfig
+++ b/lib/xz/Kconfig
@@ -9,33 +9,33 @@ config XZ_DEC
if XZ_DEC
config XZ_DEC_X86
- bool "x86 BCJ filter decoder"
- default y if X86
+ bool "x86 BCJ filter decoder" if EXPERT
+ default y
select XZ_DEC_BCJ
config XZ_DEC_POWERPC
- bool "PowerPC BCJ filter decoder"
- default y if PPC
+ bool "PowerPC BCJ filter decoder" if EXPERT
+ default y
select XZ_DEC_BCJ
config XZ_DEC_IA64
- bool "IA-64 BCJ filter decoder"
- default y if IA64
+ bool "IA-64 BCJ filter decoder" if EXPERT
+ default y
select XZ_DEC_BCJ
config XZ_DEC_ARM
- bool "ARM BCJ filter decoder"
- default y if ARM
+ bool "ARM BCJ filter decoder" if EXPERT
+ default y
select XZ_DEC_BCJ
config XZ_DEC_ARMTHUMB
- bool "ARM-Thumb BCJ filter decoder"
- default y if (ARM && ARM_THUMB)
+ bool "ARM-Thumb BCJ filter decoder" if EXPERT
+ default y
select XZ_DEC_BCJ
config XZ_DEC_SPARC
- bool "SPARC BCJ filter decoder"
- default y if SPARC
+ bool "SPARC BCJ filter decoder" if EXPERT
+ default y
select XZ_DEC_BCJ
endif
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index a6cdc969ea42..08c3c8049998 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -1043,6 +1043,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
s->lzma2.sequence = SEQ_LZMA_PREPARE;
+ /* Fall through */
+
case SEQ_LZMA_PREPARE:
if (s->lzma2.compressed < RC_INIT_BYTES)
return XZ_DATA_ERROR;
@@ -1053,6 +1055,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
s->lzma2.compressed -= RC_INIT_BYTES;
s->lzma2.sequence = SEQ_LZMA_RUN;
+ /* Fall through */
+
case SEQ_LZMA_RUN:
/*
* Set dictionary limit to indicate how much we want
diff --git a/mm/Makefile b/mm/Makefile
index b484452dac57..af8eb38c2388 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
#
mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := gup.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
diff --git a/mm/bounce.c b/mm/bounce.c
index 523918b8c6dc..ab21ba203d5c 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -3,6 +3,8 @@
* - Split from highmem.c
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/swap.h>
@@ -15,6 +17,7 @@
#include <linux/hash.h>
#include <linux/highmem.h>
#include <linux/bootmem.h>
+#include <linux/printk.h>
#include <asm/tlbflush.h>
#include <trace/events/block.h>
@@ -34,7 +37,7 @@ static __init int init_emergency_pool(void)
page_pool = mempool_create_page_pool(POOL_SIZE, 0);
BUG_ON(!page_pool);
- printk("bounce pool size: %d pages\n", POOL_SIZE);
+ pr_info("pool size: %d pages\n", POOL_SIZE);
return 0;
}
@@ -86,7 +89,7 @@ int init_emergency_isa_pool(void)
mempool_free_pages, (void *) 0);
BUG_ON(!isa_page_pool);
- printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
+ pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
return 0;
}
diff --git a/mm/compaction.c b/mm/compaction.c
index 627dc2e4320f..83ca6f9138e2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -89,7 +89,8 @@ static void __reset_isolation_suitable(struct zone *zone)
unsigned long end_pfn = zone_end_pfn(zone);
unsigned long pfn;
- zone->compact_cached_migrate_pfn = start_pfn;
+ zone->compact_cached_migrate_pfn[0] = start_pfn;
+ zone->compact_cached_migrate_pfn[1] = start_pfn;
zone->compact_cached_free_pfn = end_pfn;
zone->compact_blockskip_flush = false;
@@ -131,9 +132,10 @@ void reset_isolation_suitable(pg_data_t *pgdat)
*/
static void update_pageblock_skip(struct compact_control *cc,
struct page *page, unsigned long nr_isolated,
- bool migrate_scanner)
+ bool set_unsuitable, bool migrate_scanner)
{
struct zone *zone = cc->zone;
+ unsigned long pfn;
if (cc->ignore_skip_hint)
return;
@@ -141,20 +143,32 @@ static void update_pageblock_skip(struct compact_control *cc,
if (!page)
return;
- if (!nr_isolated) {
- unsigned long pfn = page_to_pfn(page);
+ if (nr_isolated)
+ return;
+
+ /*
+ * Only skip pageblocks when all forms of compaction will be known to
+ * fail in the near future.
+ */
+ if (set_unsuitable)
set_pageblock_skip(page);
- /* Update where compaction should restart */
- if (migrate_scanner) {
- if (!cc->finished_update_migrate &&
- pfn > zone->compact_cached_migrate_pfn)
- zone->compact_cached_migrate_pfn = pfn;
- } else {
- if (!cc->finished_update_free &&
- pfn < zone->compact_cached_free_pfn)
- zone->compact_cached_free_pfn = pfn;
- }
+ pfn = page_to_pfn(page);
+
+ /* Update where async and sync compaction should restart */
+ if (migrate_scanner) {
+ if (cc->finished_update_migrate)
+ return;
+ if (pfn > zone->compact_cached_migrate_pfn[0])
+ zone->compact_cached_migrate_pfn[0] = pfn;
+ if (cc->mode != MIGRATE_ASYNC &&
+ pfn > zone->compact_cached_migrate_pfn[1])
+ zone->compact_cached_migrate_pfn[1] = pfn;
+ } else {
+ if (cc->finished_update_free)
+ return;
+ if (pfn < zone->compact_cached_free_pfn)
+ zone->compact_cached_free_pfn = pfn;
}
}
#else
@@ -166,7 +180,7 @@ static inline bool isolation_suitable(struct compact_control *cc,
static void update_pageblock_skip(struct compact_control *cc,
struct page *page, unsigned long nr_isolated,
- bool migrate_scanner)
+ bool set_unsuitable, bool migrate_scanner)
{
}
#endif /* CONFIG_COMPACTION */
@@ -195,7 +209,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
}
/* async aborts if taking too long or contended */
- if (!cc->sync) {
+ if (cc->mode == MIGRATE_ASYNC) {
cc->contended = true;
return false;
}
@@ -208,12 +222,6 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
return true;
}
-static inline bool compact_trylock_irqsave(spinlock_t *lock,
- unsigned long *flags, struct compact_control *cc)
-{
- return compact_checklock_irqsave(lock, flags, false, cc);
-}
-
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct page *page)
{
@@ -293,14 +301,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
- total_isolated += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
-
- /* If a page was split, advance to the end of it */
if (isolated) {
+ total_isolated += isolated;
+ for (i = 0; i < isolated; i++) {
+ list_add(&page->lru, freelist);
+ page++;
+ }
+
+ /* If a page was split, advance to the end of it */
blockpfn += isolated - 1;
cursor += isolated - 1;
continue;
@@ -309,9 +317,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
isolate_fail:
if (strict)
break;
- else
- continue;
-
}
trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
@@ -329,7 +334,8 @@ isolate_fail:
/* Update the pageblock-skip if the whole pageblock was scanned */
if (blockpfn == end_pfn)
- update_pageblock_skip(cc, valid_page, total_isolated, false);
+ update_pageblock_skip(cc, valid_page, total_isolated, true,
+ false);
count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
if (total_isolated)
@@ -464,8 +470,9 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
unsigned long flags;
bool locked = false;
struct page *page = NULL, *valid_page = NULL;
- bool skipped_async_unsuitable = false;
- const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) |
+ bool set_unsuitable = true;
+ const isolate_mode_t mode = (cc->mode == MIGRATE_ASYNC ?
+ ISOLATE_ASYNC_MIGRATE : 0) |
(unevictable ? ISOLATE_UNEVICTABLE : 0);
/*
@@ -475,7 +482,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
*/
while (unlikely(too_many_isolated(zone))) {
/* async migration should just abort */
- if (!cc->sync)
+ if (cc->mode == MIGRATE_ASYNC)
return 0;
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -484,8 +491,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
return 0;
}
+ if (cond_resched()) {
+ /* Async terminates prematurely on need_resched() */
+ if (cc->mode == MIGRATE_ASYNC)
+ return 0;
+ }
+
/* Time to isolate some pages for migration */
- cond_resched();
for (; low_pfn < end_pfn; low_pfn++) {
/* give a chance to irqs before checking need_resched() */
if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) {
@@ -540,9 +552,9 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
* the minimum amount of work satisfies the allocation
*/
mt = get_pageblock_migratetype(page);
- if (!cc->sync && !migrate_async_suitable(mt)) {
- cc->finished_update_migrate = true;
- skipped_async_unsuitable = true;
+ if (cc->mode == MIGRATE_ASYNC &&
+ !migrate_async_suitable(mt)) {
+ set_unsuitable = false;
goto next_pageblock;
}
}
@@ -646,11 +658,10 @@ next_pageblock:
/*
* Update the pageblock-skip information and cached scanner pfn,
* if the whole pageblock was scanned without isolating any page.
- * This is not done when pageblock was skipped due to being unsuitable
- * for async compaction, so that eventual sync compaction can try.
*/
- if (low_pfn == end_pfn && !skipped_async_unsuitable)
- update_pageblock_skip(cc, valid_page, nr_isolated, true);
+ if (low_pfn == end_pfn)
+ update_pageblock_skip(cc, valid_page, nr_isolated,
+ set_unsuitable, true);
trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
@@ -671,7 +682,10 @@ static void isolate_freepages(struct zone *zone,
struct compact_control *cc)
{
struct page *page;
- unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
+ unsigned long block_start_pfn; /* start of current pageblock */
+ unsigned long block_end_pfn; /* end of current pageblock */
+ unsigned long low_pfn; /* lowest pfn scanner is able to scan */
+ unsigned long next_free_pfn; /* start pfn for scaning at next round */
int nr_freepages = cc->nr_freepages;
struct list_head *freelist = &cc->freepages;
@@ -679,32 +693,27 @@ static void isolate_freepages(struct zone *zone,
* Initialise the free scanner. The starting point is where we last
* successfully isolated from, zone-cached value, or the end of the
* zone when isolating for the first time. We need this aligned to
- * the pageblock boundary, because we do pfn -= pageblock_nr_pages
- * in the for loop.
+ * the pageblock boundary, because we do
+ * block_start_pfn -= pageblock_nr_pages in the for loop.
+ * For ending point, take care when isolating in last pageblock of a
+ * a zone which ends in the middle of a pageblock.
* The low boundary is the end of the pageblock the migration scanner
* is using.
*/
- pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
+ block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
+ block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
+ zone_end_pfn(zone));
low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
/*
- * Take care that if the migration scanner is at the end of the zone
- * that the free scanner does not accidentally move to the next zone
- * in the next isolation cycle.
- */
- high_pfn = min(low_pfn, pfn);
-
- z_end_pfn = zone_end_pfn(zone);
-
- /*
* Isolate free pages until enough are available to migrate the
* pages on cc->migratepages. We stop searching if the migrate
* and free page scanners meet or enough free pages are isolated.
*/
- for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
- pfn -= pageblock_nr_pages) {
+ for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
+ block_end_pfn = block_start_pfn,
+ block_start_pfn -= pageblock_nr_pages) {
unsigned long isolated;
- unsigned long end_pfn;
/*
* This can iterate a massively long zone without finding any
@@ -713,7 +722,7 @@ static void isolate_freepages(struct zone *zone,
*/
cond_resched();
- if (!pfn_valid(pfn))
+ if (!pfn_valid(block_start_pfn))
continue;
/*
@@ -723,7 +732,7 @@ static void isolate_freepages(struct zone *zone,
* i.e. it's possible that all pages within a zones range of
* pages do not belong to a single zone.
*/
- page = pfn_to_page(pfn);
+ page = pfn_to_page(block_start_pfn);
if (page_zone(page) != zone)
continue;
@@ -736,26 +745,19 @@ static void isolate_freepages(struct zone *zone,
continue;
/* Found a block suitable for isolating free pages from */
- isolated = 0;
-
- /*
- * Take care when isolating in last pageblock of a zone which
- * ends in the middle of a pageblock.
- */
- end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
- isolated = isolate_freepages_block(cc, pfn, end_pfn,
- freelist, false);
+ next_free_pfn = block_start_pfn;
+ isolated = isolate_freepages_block(cc, block_start_pfn,
+ block_end_pfn, freelist, false);
nr_freepages += isolated;
/*
- * Record the highest PFN we isolated pages from. When next
- * looking for free pages, the search will restart here as
- * page migration may have returned some pages to the allocator
+ * Set a flag that we successfully isolated in this pageblock.
+ * In the next loop iteration, zone->compact_cached_free_pfn
+ * will not be updated and thus it will effectively contain the
+ * highest pageblock we isolated pages from.
*/
- if (isolated) {
+ if (isolated)
cc->finished_update_free = true;
- high_pfn = max(high_pfn, pfn);
- }
}
/* split_free_page does not map the pages */
@@ -765,10 +767,10 @@ static void isolate_freepages(struct zone *zone,
* If we crossed the migrate scanner, we want to keep it that way
* so that compact_finished() may detect this
*/
- if (pfn < low_pfn)
- cc->free_pfn = max(pfn, zone->zone_start_pfn);
- else
- cc->free_pfn = high_pfn;
+ if (block_start_pfn < low_pfn)
+ next_free_pfn = cc->migrate_pfn;
+
+ cc->free_pfn = next_free_pfn;
cc->nr_freepages = nr_freepages;
}
@@ -799,23 +801,16 @@ static struct page *compaction_alloc(struct page *migratepage,
}
/*
- * We cannot control nr_migratepages and nr_freepages fully when migration is
- * running as migrate_pages() has no knowledge of compact_control. When
- * migration is complete, we count the number of pages on the lists by hand.
+ * This is a migrate-callback that "frees" freepages back to the isolated
+ * freelist. All pages on the freelist are from the same zone, so there is no
+ * special handling needed for NUMA.
*/
-static void update_nr_listpages(struct compact_control *cc)
+static void compaction_free(struct page *page, unsigned long data)
{
- int nr_migratepages = 0;
- int nr_freepages = 0;
- struct page *page;
-
- list_for_each_entry(page, &cc->migratepages, lru)
- nr_migratepages++;
- list_for_each_entry(page, &cc->freepages, lru)
- nr_freepages++;
+ struct compact_control *cc = (struct compact_control *)data;
- cc->nr_migratepages = nr_migratepages;
- cc->nr_freepages = nr_freepages;
+ list_add(&page->lru, &cc->freepages);
+ cc->nr_freepages++;
}
/* possible outcome of isolate_migratepages */
@@ -868,7 +863,8 @@ static int compact_finished(struct zone *zone,
/* Compaction run completes if the migrate and free scanner meet */
if (cc->free_pfn <= cc->migrate_pfn) {
/* Let the next compaction start anew. */
- zone->compact_cached_migrate_pfn = zone->zone_start_pfn;
+ zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
+ zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
zone->compact_cached_free_pfn = zone_end_pfn(zone);
/*
@@ -968,6 +964,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
int ret;
unsigned long start_pfn = zone->zone_start_pfn;
unsigned long end_pfn = zone_end_pfn(zone);
+ const bool sync = cc->mode != MIGRATE_ASYNC;
ret = compaction_suitable(zone, cc->order);
switch (ret) {
@@ -993,7 +990,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
* information on where the scanners should start but check that it
* is initialised by ensuring the values are within zone boundaries.
*/
- cc->migrate_pfn = zone->compact_cached_migrate_pfn;
+ cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
cc->free_pfn = zone->compact_cached_free_pfn;
if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
@@ -1001,7 +998,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
}
if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
cc->migrate_pfn = start_pfn;
- zone->compact_cached_migrate_pfn = cc->migrate_pfn;
+ zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
+ zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
}
trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn);
@@ -1009,7 +1007,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
migrate_prep_local();
while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
- unsigned long nr_migrate, nr_remaining;
int err;
switch (isolate_migratepages(zone, cc)) {
@@ -1024,21 +1021,20 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
;
}
- nr_migrate = cc->nr_migratepages;
+ if (!cc->nr_migratepages)
+ continue;
+
err = migrate_pages(&cc->migratepages, compaction_alloc,
- (unsigned long)cc,
- cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+ compaction_free, (unsigned long)cc, cc->mode,
MR_COMPACTION);
- update_nr_listpages(cc);
- nr_remaining = cc->nr_migratepages;
- trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
- nr_remaining);
+ trace_mm_compaction_migratepages(cc->nr_migratepages, err,
+ &cc->migratepages);
- /* Release isolated pages not migrated */
+ /* All pages were either migrated or will be released */
+ cc->nr_migratepages = 0;
if (err) {
putback_movable_pages(&cc->migratepages);
- cc->nr_migratepages = 0;
/*
* migrate_pages() may return -ENOMEM when scanners meet
* and we want compact_finished() to detect it
@@ -1060,9 +1056,8 @@ out:
return ret;
}
-static unsigned long compact_zone_order(struct zone *zone,
- int order, gfp_t gfp_mask,
- bool sync, bool *contended)
+static unsigned long compact_zone_order(struct zone *zone, int order,
+ gfp_t gfp_mask, enum migrate_mode mode, bool *contended)
{
unsigned long ret;
struct compact_control cc = {
@@ -1071,7 +1066,7 @@ static unsigned long compact_zone_order(struct zone *zone,
.order = order,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
- .sync = sync,
+ .mode = mode,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -1093,7 +1088,7 @@ int sysctl_extfrag_threshold = 500;
* @order: The order of the current allocation
* @gfp_mask: The GFP mask of the current allocation
* @nodemask: The allowed nodes to allocate from
- * @sync: Whether migration is synchronous or not
+ * @mode: The migration mode for async, sync light, or sync migration
* @contended: Return value that is true if compaction was aborted due to lock contention
* @page: Optionally capture a free page of the requested order during compaction
*
@@ -1101,7 +1096,7 @@ int sysctl_extfrag_threshold = 500;
*/
unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended)
+ enum migrate_mode mode, bool *contended)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1126,7 +1121,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) {
int status;
- status = compact_zone_order(zone, order, gfp_mask, sync,
+ status = compact_zone_order(zone, order, gfp_mask, mode,
contended);
rc = max(status, rc);
@@ -1165,9 +1160,6 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
if (zone_watermark_ok(zone, cc->order,
low_wmark_pages(zone), 0, 0))
compaction_defer_reset(zone, cc->order, false);
- /* Currently async compaction is never deferred. */
- else if (cc->sync)
- defer_compaction(zone, cc->order);
}
VM_BUG_ON(!list_empty(&cc->freepages));
@@ -1179,7 +1171,7 @@ void compact_pgdat(pg_data_t *pgdat, int order)
{
struct compact_control cc = {
.order = order,
- .sync = false,
+ .mode = MIGRATE_ASYNC,
};
if (!order)
@@ -1192,7 +1184,7 @@ static void compact_node(int nid)
{
struct compact_control cc = {
.order = -1,
- .sync = true,
+ .mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
};
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 8058fcd7ae91..306baa594f95 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -170,24 +170,16 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
retval->boundary = boundary;
retval->allocation = allocation;
- if (dev) {
- int ret;
+ INIT_LIST_HEAD(&retval->pools);
- mutex_lock(&pools_lock);
- if (list_empty(&dev->dma_pools))
- ret = device_create_file(dev, &dev_attr_pools);
- else
- ret = 0;
- /* note: not currently insisting "name" be unique */
- if (!ret)
- list_add(&retval->pools, &dev->dma_pools);
- else {
- kfree(retval);
- retval = NULL;
- }
- mutex_unlock(&pools_lock);
+ mutex_lock(&pools_lock);
+ if (list_empty(&dev->dma_pools) &&
+ device_create_file(dev, &dev_attr_pools)) {
+ kfree(retval);
+ return NULL;
} else
- INIT_LIST_HEAD(&retval->pools);
+ list_add(&retval->pools, &dev->dma_pools);
+ mutex_unlock(&pools_lock);
return retval;
}
@@ -508,7 +500,6 @@ void dmam_pool_destroy(struct dma_pool *pool)
{
struct device *dev = pool->dev;
- WARN_ON(devres_destroy(dev, dmam_pool_release, dmam_pool_match, pool));
- dma_pool_destroy(pool);
+ WARN_ON(devres_release(dev, dmam_pool_release, dmam_pool_match, pool));
}
EXPORT_SYMBOL(dmam_pool_destroy);
diff --git a/mm/filemap.c b/mm/filemap.c
index 8fb66b2a78ec..bec4b9be3525 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -257,9 +257,11 @@ static int filemap_check_errors(struct address_space *mapping)
{
int ret = 0;
/* Check for outstanding write errors */
- if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
+ if (test_bit(AS_ENOSPC, &mapping->flags) &&
+ test_and_clear_bit(AS_ENOSPC, &mapping->flags))
ret = -ENOSPC;
- if (test_and_clear_bit(AS_EIO, &mapping->flags))
+ if (test_bit(AS_EIO, &mapping->flags) &&
+ test_and_clear_bit(AS_EIO, &mapping->flags))
ret = -EIO;
return ret;
}
@@ -762,6 +764,31 @@ void end_page_writeback(struct page *page)
}
EXPORT_SYMBOL(end_page_writeback);
+/*
+ * After completing I/O on a page, call this routine to update the page
+ * flags appropriately
+ */
+void page_endio(struct page *page, int rw, int err)
+{
+ if (rw == READ) {
+ if (!err) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ unlock_page(page);
+ } else { /* rw == WRITE */
+ if (err) {
+ SetPageError(page);
+ if (page->mapping)
+ mapping_set_error(page->mapping, err);
+ }
+ end_page_writeback(page);
+ }
+}
+EXPORT_SYMBOL_GPL(page_endio);
+
/**
* __lock_page - get a lock on the page, assuming we need to sleep to get it
* @page: the page to lock
@@ -955,26 +982,6 @@ out:
EXPORT_SYMBOL(find_get_entry);
/**
- * find_get_page - find and get a page reference
- * @mapping: the address_space to search
- * @offset: the page index
- *
- * Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned with an increased refcount.
- *
- * Otherwise, %NULL is returned.
- */
-struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
-{
- struct page *page = find_get_entry(mapping, offset);
-
- if (radix_tree_exceptional_entry(page))
- page = NULL;
- return page;
-}
-EXPORT_SYMBOL(find_get_page);
-
-/**
* find_lock_entry - locate, pin and lock a page cache entry
* @mapping: the address_space to search
* @offset: the page cache index
@@ -1011,66 +1018,84 @@ repeat:
EXPORT_SYMBOL(find_lock_entry);
/**
- * find_lock_page - locate, pin and lock a pagecache page
+ * pagecache_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
+ * @fgp_flags: PCG flags
+ * @gfp_mask: gfp mask to use if a page is to be allocated
*
- * Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * Looks up the page cache slot at @mapping & @offset.
*
- * Otherwise, %NULL is returned.
+ * PCG flags modify how the page is returned
*
- * find_lock_page() may sleep.
- */
-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
-{
- struct page *page = find_lock_entry(mapping, offset);
-
- if (radix_tree_exceptional_entry(page))
- page = NULL;
- return page;
-}
-EXPORT_SYMBOL(find_lock_page);
-
-/**
- * find_or_create_page - locate or add a pagecache page
- * @mapping: the page's address_space
- * @index: the page's index into the mapping
- * @gfp_mask: page allocation mode
+ * FGP_ACCESSED: the page will be marked accessed
+ * FGP_LOCK: Page is return locked
+ * FGP_CREAT: If page is not present then a new page is allocated using
+ * @gfp_mask and added to the page cache and the VM's LRU
+ * list. The page is returned locked and with an increased
+ * refcount. Otherwise, %NULL is returned.
*
- * Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
+ * if the GFP flags specified for FGP_CREAT are atomic.
*
- * If the page is not present, a new page is allocated using @gfp_mask
- * and added to the page cache and the VM's LRU list. The page is
- * returned locked and with an increased refcount.
- *
- * On memory exhaustion, %NULL is returned.
- *
- * find_or_create_page() may sleep, even if @gfp_flags specifies an
- * atomic allocation!
+ * If there is a page cache page, it is returned with an increased refcount.
*/
-struct page *find_or_create_page(struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask)
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
+ int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask)
{
struct page *page;
- int err;
+
repeat:
- page = find_lock_page(mapping, index);
- if (!page) {
- page = __page_cache_alloc(gfp_mask);
+ page = find_get_entry(mapping, offset);
+ if (radix_tree_exceptional_entry(page))
+ page = NULL;
+ if (!page)
+ goto no_page;
+
+ if (fgp_flags & FGP_LOCK) {
+ if (fgp_flags & FGP_NOWAIT) {
+ if (!trylock_page(page)) {
+ page_cache_release(page);
+ return NULL;
+ }
+ } else {
+ lock_page(page);
+ }
+
+ /* Has the page been truncated? */
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto repeat;
+ }
+ VM_BUG_ON_PAGE(page->index != offset, page);
+ }
+
+ if (page && (fgp_flags & FGP_ACCESSED))
+ mark_page_accessed(page);
+
+no_page:
+ if (!page && (fgp_flags & FGP_CREAT)) {
+ int err;
+ if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
+ cache_gfp_mask |= __GFP_WRITE;
+ if (fgp_flags & FGP_NOFS) {
+ cache_gfp_mask &= ~__GFP_FS;
+ radix_gfp_mask &= ~__GFP_FS;
+ }
+
+ page = __page_cache_alloc(cache_gfp_mask);
if (!page)
return NULL;
- /*
- * We want a regular kernel memory (not highmem or DMA etc)
- * allocation for the radix tree nodes, but we need to honour
- * the context-specific requirements the caller has asked for.
- * GFP_RECLAIM_MASK collects those requirements.
- */
- err = add_to_page_cache_lru(page, mapping, index,
- (gfp_mask & GFP_RECLAIM_MASK));
+
+ if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
+ fgp_flags |= FGP_LOCK;
+
+ /* Init accessed so avoit atomic mark_page_accessed later */
+ if (fgp_flags & FGP_ACCESSED)
+ init_page_accessed(page);
+
+ err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
if (unlikely(err)) {
page_cache_release(page);
page = NULL;
@@ -1078,9 +1103,10 @@ repeat:
goto repeat;
}
}
+
return page;
}
-EXPORT_SYMBOL(find_or_create_page);
+EXPORT_SYMBOL(pagecache_get_page);
/**
* find_get_entries - gang pagecache lookup
@@ -1377,39 +1403,6 @@ repeat:
}
EXPORT_SYMBOL(find_get_pages_tag);
-/**
- * grab_cache_page_nowait - returns locked page at given index in given cache
- * @mapping: target address_space
- * @index: the page index
- *
- * Same as grab_cache_page(), but do not wait if the page is unavailable.
- * This is intended for speculative data generators, where the data can
- * be regenerated if the page couldn't be grabbed. This routine should
- * be safe to call while holding the lock for another page.
- *
- * Clear __GFP_FS when allocating the page to avoid recursion into the fs
- * and deadlock against the caller's locked page.
- */
-struct page *
-grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
-{
- struct page *page = find_get_page(mapping, index);
-
- if (page) {
- if (trylock_page(page))
- return page;
- page_cache_release(page);
- return NULL;
- }
- page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
- if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
- page_cache_release(page);
- page = NULL;
- }
- return page;
-}
-EXPORT_SYMBOL(grab_cache_page_nowait);
-
/*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
* a _large_ part of the i/o request. Imagine the worst scenario:
@@ -2325,7 +2318,6 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
{
const struct address_space_operations *aops = mapping->a_ops;
- mark_page_accessed(page);
return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
}
EXPORT_SYMBOL(pagecache_write_end);
@@ -2405,34 +2397,18 @@ EXPORT_SYMBOL(generic_file_direct_write);
struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags)
{
- int status;
- gfp_t gfp_mask;
struct page *page;
- gfp_t gfp_notmask = 0;
+ int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT;
- gfp_mask = mapping_gfp_mask(mapping);
- if (mapping_cap_account_dirty(mapping))
- gfp_mask |= __GFP_WRITE;
if (flags & AOP_FLAG_NOFS)
- gfp_notmask = __GFP_FS;
-repeat:
- page = find_lock_page(mapping, index);
+ fgp_flags |= FGP_NOFS;
+
+ page = pagecache_get_page(mapping, index, fgp_flags,
+ mapping_gfp_mask(mapping),
+ GFP_KERNEL);
if (page)
- goto found;
+ wait_for_stable_page(page);
- page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
- if (!page)
- return NULL;
- status = add_to_page_cache_lru(page, mapping, index,
- GFP_KERNEL & ~gfp_notmask);
- if (unlikely(status)) {
- page_cache_release(page);
- if (status == -EEXIST)
- goto repeat;
- return NULL;
- }
-found:
- wait_for_stable_page(page);
return page;
}
EXPORT_SYMBOL(grab_cache_page_write_begin);
@@ -2481,7 +2457,7 @@ again:
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
- if (unlikely(status))
+ if (unlikely(status < 0))
break;
if (mapping_writably_mapped(mapping))
@@ -2490,7 +2466,6 @@ again:
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
flush_dcache_page(page);
- mark_page_accessed(page);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
if (unlikely(status < 0))
diff --git a/mm/fremap.c b/mm/fremap.c
deleted file mode 100644
index 34feba60a17e..000000000000
--- a/mm/fremap.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * linux/mm/fremap.c
- *
- * Explicit pagetable population and nonlinear (random) mappings support.
- *
- * started by Ingo Molnar, Copyright (C) 2002, 2003
- */
-#include <linux/export.h>
-#include <linux/backing-dev.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/file.h>
-#include <linux/mman.h>
-#include <linux/pagemap.h>
-#include <linux/swapops.h>
-#include <linux/rmap.h>
-#include <linux/syscalls.h>
-#include <linux/mmu_notifier.h>
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-#include "internal.h"
-
-static int mm_counter(struct page *page)
-{
- return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
-}
-
-static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = *ptep;
- struct page *page;
- swp_entry_t entry;
-
- if (pte_present(pte)) {
- flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
- page = vm_normal_page(vma, addr, pte);
- if (page) {
- if (pte_dirty(pte))
- set_page_dirty(page);
- update_hiwater_rss(mm);
- dec_mm_counter(mm, mm_counter(page));
- page_remove_rmap(page);
- page_cache_release(page);
- }
- } else { /* zap_pte() is not called when pte_none() */
- if (!pte_file(pte)) {
- update_hiwater_rss(mm);
- entry = pte_to_swp_entry(pte);
- if (non_swap_entry(entry)) {
- if (is_migration_entry(entry)) {
- page = migration_entry_to_page(entry);
- dec_mm_counter(mm, mm_counter(page));
- }
- } else {
- free_swap_and_cache(entry);
- dec_mm_counter(mm, MM_SWAPENTS);
- }
- }
- pte_clear_not_present_full(mm, addr, ptep, 0);
- }
-}
-
-/*
- * Install a file pte to a given virtual memory address, release any
- * previously existing mapping.
- */
-static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pgoff, pgprot_t prot)
-{
- int err = -ENOMEM;
- pte_t *pte, ptfile;
- spinlock_t *ptl;
-
- pte = get_locked_pte(mm, addr, &ptl);
- if (!pte)
- goto out;
-
- ptfile = pgoff_to_pte(pgoff);
-
- if (!pte_none(*pte)) {
- if (pte_present(*pte) && pte_soft_dirty(*pte))
- pte_file_mksoft_dirty(ptfile);
- zap_pte(mm, vma, addr, pte);
- }
-
- set_pte_at(mm, addr, pte, ptfile);
- /*
- * We don't need to run update_mmu_cache() here because the "file pte"
- * being installed by install_file_pte() is not a real pte - it's a
- * non-present entry (like a swap entry), noting what file offset should
- * be mapped there when there's a fault (in a non-linear vma where
- * that's not obvious).
- */
- pte_unmap_unlock(pte, ptl);
- err = 0;
-out:
- return err;
-}
-
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- struct mm_struct *mm = vma->vm_mm;
- int err;
-
- do {
- err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
- if (err)
- return err;
-
- size -= PAGE_SIZE;
- addr += PAGE_SIZE;
- pgoff++;
- } while (size);
-
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
-/**
- * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
- * @start: start of the remapped virtual memory range
- * @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range (see NOTE)
- * @pgoff: to-be-mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
- *
- * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
- * (shared backing store file).
- *
- * This syscall works purely via pagetables, so it's the most efficient
- * way to map the same (large) file into a given virtual window. Unlike
- * mmap()/mremap() it does not create any new vmas. The new mappings are
- * also safe across swapout.
- *
- * NOTE: the @prot parameter right now is ignored (but must be zero),
- * and the vma's default protection is used. Arbitrary protections
- * might be implemented in the future.
- */
-SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
- unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
-{
- struct mm_struct *mm = current->mm;
- struct address_space *mapping;
- struct vm_area_struct *vma;
- int err = -EINVAL;
- int has_write_lock = 0;
- vm_flags_t vm_flags = 0;
-
- if (prot)
- return err;
- /*
- * Sanitize the syscall parameters:
- */
- start = start & PAGE_MASK;
- size = size & PAGE_MASK;
-
- /* Does the address range wrap, or is the span zero-sized? */
- if (start + size <= start)
- return err;
-
- /* Does pgoff wrap? */
- if (pgoff + (size >> PAGE_SHIFT) < pgoff)
- return err;
-
- /* Can we represent this offset inside this architecture's pte's? */
-#if PTE_FILE_MAX_BITS < BITS_PER_LONG
- if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
- return err;
-#endif
-
- /* We need down_write() to change vma->vm_flags. */
- down_read(&mm->mmap_sem);
- retry:
- vma = find_vma(mm, start);
-
- /*
- * Make sure the vma is shared, that it supports prefaulting,
- * and that the remapped range is valid and fully within
- * the single existing vma.
- */
- if (!vma || !(vma->vm_flags & VM_SHARED))
- goto out;
-
- if (!vma->vm_ops || !vma->vm_ops->remap_pages)
- goto out;
-
- if (start < vma->vm_start || start + size > vma->vm_end)
- goto out;
-
- /* Must set VM_NONLINEAR before any pages are populated. */
- if (!(vma->vm_flags & VM_NONLINEAR)) {
- /*
- * vm_private_data is used as a swapout cursor
- * in a VM_NONLINEAR vma.
- */
- if (vma->vm_private_data)
- goto out;
-
- /* Don't need a nonlinear mapping, exit success */
- if (pgoff == linear_page_index(vma, start)) {
- err = 0;
- goto out;
- }
-
- if (!has_write_lock) {
-get_write_lock:
- up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- has_write_lock = 1;
- goto retry;
- }
- mapping = vma->vm_file->f_mapping;
- /*
- * page_mkclean doesn't work on nonlinear vmas, so if
- * dirty pages need to be accounted, emulate with linear
- * vmas.
- */
- if (mapping_cap_account_dirty(mapping)) {
- unsigned long addr;
- struct file *file = get_file(vma->vm_file);
- /* mmap_region may free vma; grab the info now */
- vm_flags = vma->vm_flags;
-
- addr = mmap_region(file, start, size, vm_flags, pgoff);
- fput(file);
- if (IS_ERR_VALUE(addr)) {
- err = addr;
- } else {
- BUG_ON(addr != start);
- err = 0;
- }
- goto out_freed;
- }
- mutex_lock(&mapping->i_mmap_mutex);
- flush_dcache_mmap_lock(mapping);
- vma->vm_flags |= VM_NONLINEAR;
- vma_interval_tree_remove(vma, &mapping->i_mmap);
- vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
- flush_dcache_mmap_unlock(mapping);
- mutex_unlock(&mapping->i_mmap_mutex);
- }
-
- if (vma->vm_flags & VM_LOCKED) {
- /*
- * drop PG_Mlocked flag for over-mapped range
- */
- if (!has_write_lock)
- goto get_write_lock;
- vm_flags = vma->vm_flags;
- munlock_vma_pages_range(vma, start, start + size);
- vma->vm_flags = vm_flags;
- }
-
- mmu_notifier_invalidate_range_start(mm, start, start + size);
- err = vma->vm_ops->remap_pages(vma, start, size, pgoff);
- mmu_notifier_invalidate_range_end(mm, start, start + size);
-
- /*
- * We can't clear VM_NONLINEAR because we'd have to do
- * it after ->populate completes, and that would prevent
- * downgrading the lock. (Locks can't be upgraded).
- */
-
-out:
- if (vma)
- vm_flags = vma->vm_flags;
-out_freed:
- if (likely(!has_write_lock))
- up_read(&mm->mmap_sem);
- else
- up_write(&mm->mmap_sem);
- if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK)))
- mm_populate(start, size);
-
- return err;
-}
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 1b24bdcb3197..c30eec536f03 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -327,15 +327,12 @@ EXPORT_SYMBOL(__frontswap_invalidate_area);
static unsigned long __frontswap_curr_pages(void)
{
- int type;
unsigned long totalpages = 0;
struct swap_info_struct *si = NULL;
assert_spin_locked(&swap_lock);
- for (type = swap_list.head; type >= 0; type = si->next) {
- si = swap_info[type];
+ plist_for_each_entry(si, &swap_active_head, list)
totalpages += atomic_read(&si->frontswap_pages);
- }
return totalpages;
}
@@ -347,11 +344,9 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
int si_frontswap_pages;
unsigned long total_pages_to_unuse = total;
unsigned long pages = 0, pages_to_unuse = 0;
- int type;
assert_spin_locked(&swap_lock);
- for (type = swap_list.head; type >= 0; type = si->next) {
- si = swap_info[type];
+ plist_for_each_entry(si, &swap_active_head, list) {
si_frontswap_pages = atomic_read(&si->frontswap_pages);
if (total_pages_to_unuse < si_frontswap_pages) {
pages = pages_to_unuse = total_pages_to_unuse;
@@ -366,7 +361,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
}
vm_unacct_memory(pages);
*unused = pages_to_unuse;
- *swapid = type;
+ *swapid = si->type;
ret = 0;
break;
}
@@ -413,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
/*
* we don't want to hold swap_lock while doing a very
* lengthy try_to_unuse, but swap_list may change
- * so restart scan from swap_list.head each time
+ * so restart scan from swap_active_head each time
*/
spin_lock(&swap_lock);
ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/gup.c b/mm/gup.c
new file mode 100644
index 000000000000..cc5a9e7adea7
--- /dev/null
+++ b/mm/gup.c
@@ -0,0 +1,662 @@
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+
+#include <linux/hugetlb.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
+#include "internal.h"
+
+static struct page *no_page_table(struct vm_area_struct *vma,
+ unsigned int flags)
+{
+ /*
+ * When core dumping an enormous anonymous area that nobody
+ * has touched so far, we don't want to allocate unnecessary pages or
+ * page tables. Return error instead of NULL to skip handle_mm_fault,
+ * then get_dump_page() will return NULL to leave a hole in the dump.
+ * But we can only make this optimization where a hole would surely
+ * be zero-filled if handle_mm_fault() actually did handle it.
+ */
+ if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
+ return ERR_PTR(-EFAULT);
+ return NULL;
+}
+
+static struct page *follow_page_pte(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd, unsigned int flags)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct page *page;
+ spinlock_t *ptl;
+ pte_t *ptep, pte;
+
+retry:
+ if (unlikely(pmd_bad(*pmd)))
+ return no_page_table(vma, flags);
+
+ ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ pte = *ptep;
+ if (!pte_present(pte)) {
+ swp_entry_t entry;
+ /*
+ * KSM's break_ksm() relies upon recognizing a ksm page
+ * even while it is being migrated, so for that case we
+ * need migration_entry_wait().
+ */
+ if (likely(!(flags & FOLL_MIGRATION)))
+ goto no_page;
+ if (pte_none(pte) || pte_file(pte))
+ goto no_page;
+ entry = pte_to_swp_entry(pte);
+ if (!is_migration_entry(entry))
+ goto no_page;
+ pte_unmap_unlock(ptep, ptl);
+ migration_entry_wait(mm, pmd, address);
+ goto retry;
+ }
+ if ((flags & FOLL_NUMA) && pte_numa(pte))
+ goto no_page;
+ if ((flags & FOLL_WRITE) && !pte_write(pte)) {
+ pte_unmap_unlock(ptep, ptl);
+ return NULL;
+ }
+
+ page = vm_normal_page(vma, address, pte);
+ if (unlikely(!page)) {
+ if ((flags & FOLL_DUMP) ||
+ !is_zero_pfn(pte_pfn(pte)))
+ goto bad_page;
+ page = pte_page(pte);
+ }
+
+ if (flags & FOLL_GET)
+ get_page_foll(page);
+ if (flags & FOLL_TOUCH) {
+ if ((flags & FOLL_WRITE) &&
+ !pte_dirty(pte) && !PageDirty(page))
+ set_page_dirty(page);
+ /*
+ * pte_mkyoung() would be more correct here, but atomic care
+ * is needed to avoid losing the dirty bit: it is easier to use
+ * mark_page_accessed().
+ */
+ mark_page_accessed(page);
+ }
+ if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+ /*
+ * The preliminary mapping check is mainly to avoid the
+ * pointless overhead of lock_page on the ZERO_PAGE
+ * which might bounce very badly if there is contention.
+ *
+ * If the page is already locked, we don't need to
+ * handle it now - vmscan will handle it later if and
+ * when it attempts to reclaim the page.
+ */
+ if (page->mapping && trylock_page(page)) {
+ lru_add_drain(); /* push cached pages to LRU */
+ /*
+ * Because we lock page here, and migration is
+ * blocked by the pte's page reference, and we
+ * know the page is still mapped, we don't even
+ * need to check for file-cache page truncation.
+ */
+ mlock_vma_page(page);
+ unlock_page(page);
+ }
+ }
+ pte_unmap_unlock(ptep, ptl);
+ return page;
+bad_page:
+ pte_unmap_unlock(ptep, ptl);
+ return ERR_PTR(-EFAULT);
+
+no_page:
+ pte_unmap_unlock(ptep, ptl);
+ if (!pte_none(pte))
+ return NULL;
+ return no_page_table(vma, flags);
+}
+
+/**
+ * follow_page_mask - look up a page descriptor from a user-virtual address
+ * @vma: vm_area_struct mapping @address
+ * @address: virtual address to look up
+ * @flags: flags modifying lookup behaviour
+ * @page_mask: on output, *page_mask is set according to the size of the page
+ *
+ * @flags can have FOLL_ flags set, defined in <linux/mm.h>
+ *
+ * Returns the mapped (struct page *), %NULL if no mapping exists, or
+ * an error pointer if there is a mapping to something not represented
+ * by a page descriptor (see also vm_normal_page()).
+ */
+struct page *follow_page_mask(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags,
+ unsigned int *page_mask)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ spinlock_t *ptl;
+ struct page *page;
+ struct mm_struct *mm = vma->vm_mm;
+
+ *page_mask = 0;
+
+ page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+ if (!IS_ERR(page)) {
+ BUG_ON(flags & FOLL_GET);
+ return page;
+ }
+
+ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ return no_page_table(vma, flags);
+
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud))
+ return no_page_table(vma, flags);
+ if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
+ if (flags & FOLL_GET)
+ return NULL;
+ page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
+ return page;
+ }
+ if (unlikely(pud_bad(*pud)))
+ return no_page_table(vma, flags);
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd))
+ return no_page_table(vma, flags);
+ if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
+ page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
+ if (flags & FOLL_GET) {
+ /*
+ * Refcount on tail pages are not well-defined and
+ * shouldn't be taken. The caller should handle a NULL
+ * return when trying to follow tail pages.
+ */
+ if (PageHead(page))
+ get_page(page);
+ else
+ page = NULL;
+ }
+ return page;
+ }
+ if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ return no_page_table(vma, flags);
+ if (pmd_trans_huge(*pmd)) {
+ if (flags & FOLL_SPLIT) {
+ split_huge_page_pmd(vma, address, pmd);
+ return follow_page_pte(vma, address, pmd, flags);
+ }
+ ptl = pmd_lock(mm, pmd);
+ if (likely(pmd_trans_huge(*pmd))) {
+ if (unlikely(pmd_trans_splitting(*pmd))) {
+ spin_unlock(ptl);
+ wait_split_huge_page(vma->anon_vma, pmd);
+ } else {
+ page = follow_trans_huge_pmd(vma, address,
+ pmd, flags);
+ spin_unlock(ptl);
+ *page_mask = HPAGE_PMD_NR - 1;
+ return page;
+ }
+ } else
+ spin_unlock(ptl);
+ }
+ return follow_page_pte(vma, address, pmd, flags);
+}
+
+static int get_gate_page(struct mm_struct *mm, unsigned long address,
+ unsigned int gup_flags, struct vm_area_struct **vma,
+ struct page **page)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int ret = -EFAULT;
+
+ /* user gate pages are read-only */
+ if (gup_flags & FOLL_WRITE)
+ return -EFAULT;
+ if (address > TASK_SIZE)
+ pgd = pgd_offset_k(address);
+ else
+ pgd = pgd_offset_gate(mm, address);
+ BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, address);
+ BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd))
+ return -EFAULT;
+ VM_BUG_ON(pmd_trans_huge(*pmd));
+ pte = pte_offset_map(pmd, address);
+ if (pte_none(*pte))
+ goto unmap;
+ *vma = get_gate_vma(mm);
+ if (!page)
+ goto out;
+ *page = vm_normal_page(*vma, address, *pte);
+ if (!*page) {
+ if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
+ goto unmap;
+ *page = pte_page(*pte);
+ }
+ get_page(*page);
+out:
+ ret = 0;
+unmap:
+ pte_unmap(pte);
+ return ret;
+}
+
+static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
+ unsigned long address, unsigned int *flags, int *nonblocking)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned int fault_flags = 0;
+ int ret;
+
+ /* For mlock, just skip the stack guard page. */
+ if ((*flags & FOLL_MLOCK) &&
+ (stack_guard_page_start(vma, address) ||
+ stack_guard_page_end(vma, address + PAGE_SIZE)))
+ return -ENOENT;
+ if (*flags & FOLL_WRITE)
+ fault_flags |= FAULT_FLAG_WRITE;
+ if (nonblocking)
+ fault_flags |= FAULT_FLAG_ALLOW_RETRY;
+ if (*flags & FOLL_NOWAIT)
+ fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+
+ ret = handle_mm_fault(mm, vma, address, fault_flags);
+ if (ret & VM_FAULT_ERROR) {
+ if (ret & VM_FAULT_OOM)
+ return -ENOMEM;
+ if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
+ return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
+ if (ret & VM_FAULT_SIGBUS)
+ return -EFAULT;
+ BUG();
+ }
+
+ if (tsk) {
+ if (ret & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+ }
+
+ if (ret & VM_FAULT_RETRY) {
+ if (nonblocking)
+ *nonblocking = 0;
+ return -EBUSY;
+ }
+
+ /*
+ * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
+ * necessary, even if maybe_mkwrite decided not to set pte_write. We
+ * can thus safely do subsequent page lookups as if they were reads.
+ * But only do so when looping for pte_write is futile: in some cases
+ * userspace may also be wanting to write to the gotten user page,
+ * which a read fault here might prevent (a readonly page might get
+ * reCOWed by userspace write).
+ */
+ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
+ *flags &= ~FOLL_WRITE;
+ return 0;
+}
+
+static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
+{
+ vm_flags_t vm_flags = vma->vm_flags;
+
+ if (vm_flags & (VM_IO | VM_PFNMAP))
+ return -EFAULT;
+
+ if (gup_flags & FOLL_WRITE) {
+ if (!(vm_flags & VM_WRITE)) {
+ if (!(gup_flags & FOLL_FORCE))
+ return -EFAULT;
+ /*
+ * We used to let the write,force case do COW in a
+ * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
+ * set a breakpoint in a read-only mapping of an
+ * executable, without corrupting the file (yet only
+ * when that file had been opened for writing!).
+ * Anon pages in shared mappings are surprising: now
+ * just reject it.
+ */
+ if (!is_cow_mapping(vm_flags)) {
+ WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
+ return -EFAULT;
+ }
+ }
+ } else if (!(vm_flags & VM_READ)) {
+ if (!(gup_flags & FOLL_FORCE))
+ return -EFAULT;
+ /*
+ * Is there actually any vma we can reach here which does not
+ * have VM_MAYREAD set?
+ */
+ if (!(vm_flags & VM_MAYREAD))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+/**
+ * __get_user_pages() - pin user pages in memory
+ * @tsk: task_struct of target task
+ * @mm: mm_struct of target mm
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @gup_flags: flags modifying pin behaviour
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long. Or NULL, if caller
+ * only intends to ensure the pages are faulted in.
+ * @vmas: array of pointers to vmas corresponding to each page.
+ * Or NULL if the caller does not require them.
+ * @nonblocking: whether waiting for disk IO or mmap_sem contention
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with. vmas will only
+ * remain valid while mmap_sem is held.
+ *
+ * Must be called with mmap_sem held for read or write.
+ *
+ * __get_user_pages walks a process's page tables and takes a reference to
+ * each struct page that each user address corresponds to at a given
+ * instant. That is, it takes the page that would be accessed if a user
+ * thread accesses the given user virtual address at that instant.
+ *
+ * This does not guarantee that the page exists in the user mappings when
+ * __get_user_pages returns, and there may even be a completely different
+ * page there in some cases (eg. if mmapped pagecache has been invalidated
+ * and subsequently re faulted). However it does guarantee that the page
+ * won't be freed completely. And mostly callers simply care that the page
+ * contains data that was valid *at some point in time*. Typically, an IO
+ * or similar operation cannot guarantee anything stronger anyway because
+ * locks can't be held over the syscall boundary.
+ *
+ * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
+ * the page is written to, set_page_dirty (or set_page_dirty_lock, as
+ * appropriate) must be called after the page is finished with, and
+ * before put_page is called.
+ *
+ * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
+ * or mmap_sem contention, and if waiting is needed to pin all pages,
+ * *@nonblocking will be set to 0.
+ *
+ * In most cases, get_user_pages or get_user_pages_fast should be used
+ * instead of __get_user_pages. __get_user_pages should be used only if
+ * you need some special @gup_flags.
+ */
+long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas, int *nonblocking)
+{
+ long i = 0;
+ unsigned int page_mask;
+ struct vm_area_struct *vma = NULL;
+
+ if (!nr_pages)
+ return 0;
+
+ VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
+
+ /*
+ * If FOLL_FORCE is set then do not force a full fault as the hinting
+ * fault information is unrelated to the reference behaviour of a task
+ * using the address space
+ */
+ if (!(gup_flags & FOLL_FORCE))
+ gup_flags |= FOLL_NUMA;
+
+ do {
+ struct page *page;
+ unsigned int foll_flags = gup_flags;
+ unsigned int page_increm;
+
+ /* first iteration or cross vma bound */
+ if (!vma || start >= vma->vm_end) {
+ vma = find_extend_vma(mm, start);
+ if (!vma && in_gate_area(mm, start)) {
+ int ret;
+ ret = get_gate_page(mm, start & PAGE_MASK,
+ gup_flags, &vma,
+ pages ? &pages[i] : NULL);
+ if (ret)
+ return i ? : ret;
+ page_mask = 0;
+ goto next_page;
+ }
+
+ if (!vma || check_vma_flags(vma, gup_flags))
+ return i ? : -EFAULT;
+ if (is_vm_hugetlb_page(vma)) {
+ i = follow_hugetlb_page(mm, vma, pages, vmas,
+ &start, &nr_pages, i,
+ gup_flags);
+ continue;
+ }
+ }
+retry:
+ /*
+ * If we have a pending SIGKILL, don't keep faulting pages and
+ * potentially allocating memory.
+ */
+ if (unlikely(fatal_signal_pending(current)))
+ return i ? i : -ERESTARTSYS;
+ cond_resched();
+ page = follow_page_mask(vma, start, foll_flags, &page_mask);
+ if (!page) {
+ int ret;
+ ret = faultin_page(tsk, vma, start, &foll_flags,
+ nonblocking);
+ switch (ret) {
+ case 0:
+ goto retry;
+ case -EFAULT:
+ case -ENOMEM:
+ case -EHWPOISON:
+ return i ? i : ret;
+ case -EBUSY:
+ return i;
+ case -ENOENT:
+ goto next_page;
+ }
+ BUG();
+ }
+ if (IS_ERR(page))
+ return i ? i : PTR_ERR(page);
+ if (pages) {
+ pages[i] = page;
+ flush_anon_page(vma, page, start);
+ flush_dcache_page(page);
+ page_mask = 0;
+ }
+next_page:
+ if (vmas) {
+ vmas[i] = vma;
+ page_mask = 0;
+ }
+ page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
+ if (page_increm > nr_pages)
+ page_increm = nr_pages;
+ i += page_increm;
+ start += page_increm * PAGE_SIZE;
+ nr_pages -= page_increm;
+ } while (nr_pages);
+ return i;
+}
+EXPORT_SYMBOL(__get_user_pages);
+
+/*
+ * fixup_user_fault() - manually resolve a user page fault
+ * @tsk: the task_struct to use for page fault accounting, or
+ * NULL if faults are not to be recorded.
+ * @mm: mm_struct of target mm
+ * @address: user address
+ * @fault_flags:flags to pass down to handle_mm_fault()
+ *
+ * This is meant to be called in the specific scenario where for locking reasons
+ * we try to access user memory in atomic context (within a pagefault_disable()
+ * section), this returns -EFAULT, and we want to resolve the user fault before
+ * trying again.
+ *
+ * Typically this is meant to be used by the futex code.
+ *
+ * The main difference with get_user_pages() is that this function will
+ * unconditionally call handle_mm_fault() which will in turn perform all the
+ * necessary SW fixup of the dirty and young bits in the PTE, while
+ * handle_mm_fault() only guarantees to update these in the struct page.
+ *
+ * This is important for some architectures where those bits also gate the
+ * access permission to the page because they are maintained in software. On
+ * such architectures, gup() will not be enough to make a subsequent access
+ * succeed.
+ *
+ * This should be called with the mm_sem held for read.
+ */
+int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long address, unsigned int fault_flags)
+{
+ struct vm_area_struct *vma;
+ vm_flags_t vm_flags;
+ int ret;
+
+ vma = find_extend_vma(mm, address);
+ if (!vma || address < vma->vm_start)
+ return -EFAULT;
+
+ vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+ if (!(vm_flags & vma->vm_flags))
+ return -EFAULT;
+
+ ret = handle_mm_fault(mm, vma, address, fault_flags);
+ if (ret & VM_FAULT_ERROR) {
+ if (ret & VM_FAULT_OOM)
+ return -ENOMEM;
+ if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
+ return -EHWPOISON;
+ if (ret & VM_FAULT_SIGBUS)
+ return -EFAULT;
+ BUG();
+ }
+ if (tsk) {
+ if (ret & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+ }
+ return 0;
+}
+
+/*
+ * get_user_pages() - pin user pages in memory
+ * @tsk: the task_struct to use for page fault accounting, or
+ * NULL if faults are not to be recorded.
+ * @mm: mm_struct of target mm
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @write: whether pages will be written to by the caller
+ * @force: whether to force access even when user mapping is currently
+ * protected (but never forces write access to shared mapping).
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long. Or NULL, if caller
+ * only intends to ensure the pages are faulted in.
+ * @vmas: array of pointers to vmas corresponding to each page.
+ * Or NULL if the caller does not require them.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with. vmas will only
+ * remain valid while mmap_sem is held.
+ *
+ * Must be called with mmap_sem held for read or write.
+ *
+ * get_user_pages walks a process's page tables and takes a reference to
+ * each struct page that each user address corresponds to at a given
+ * instant. That is, it takes the page that would be accessed if a user
+ * thread accesses the given user virtual address at that instant.
+ *
+ * This does not guarantee that the page exists in the user mappings when
+ * get_user_pages returns, and there may even be a completely different
+ * page there in some cases (eg. if mmapped pagecache has been invalidated
+ * and subsequently re faulted). However it does guarantee that the page
+ * won't be freed completely. And mostly callers simply care that the page
+ * contains data that was valid *at some point in time*. Typically, an IO
+ * or similar operation cannot guarantee anything stronger anyway because
+ * locks can't be held over the syscall boundary.
+ *
+ * If write=0, the page must not be written to. If the page is written to,
+ * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
+ * after the page is finished with, and before put_page is called.
+ *
+ * get_user_pages is typically used for fewer-copy IO operations, to get a
+ * handle on the memory by some means other than accesses via the user virtual
+ * addresses. The pages may be submitted for DMA to devices or accessed via
+ * their kernel linear mapping (via the kmap APIs). Care should be taken to
+ * use the correct cache flushing APIs.
+ *
+ * See also get_user_pages_fast, for performance critical applications.
+ */
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages, int write,
+ int force, struct page **pages, struct vm_area_struct **vmas)
+{
+ int flags = FOLL_TOUCH;
+
+ if (pages)
+ flags |= FOLL_GET;
+ if (write)
+ flags |= FOLL_WRITE;
+ if (force)
+ flags |= FOLL_FORCE;
+
+ return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
+ NULL);
+}
+EXPORT_SYMBOL(get_user_pages);
+
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by page_cache_release() or put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_sem, but after all other threads have been killed.
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+ struct vm_area_struct *vma;
+ struct page *page;
+
+ if (__get_user_pages(current, current->mm, addr, 1,
+ FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
+ NULL) < 1)
+ return NULL;
+ flush_cache_page(vma, addr, page_to_pfn(page));
+ return page;
+}
+#endif /* CONFIG_ELF_CORE */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d199d2d91946..c5ff461e0253 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -5,6 +5,8 @@
* the COPYING file in the top-level directory.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/highmem.h>
@@ -151,8 +153,7 @@ static int start_khugepaged(void)
khugepaged_thread = kthread_run(khugepaged, NULL,
"khugepaged");
if (unlikely(IS_ERR(khugepaged_thread))) {
- printk(KERN_ERR
- "khugepaged: kthread_run(khugepaged) failed\n");
+ pr_err("khugepaged: kthread_run(khugepaged) failed\n");
err = PTR_ERR(khugepaged_thread);
khugepaged_thread = NULL;
}
@@ -584,19 +585,19 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
if (unlikely(!*hugepage_kobj)) {
- printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n");
+ pr_err("failed to create transparent hugepage kobject\n");
return -ENOMEM;
}
err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
if (err) {
- printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
+ pr_err("failed to register transparent hugepage group\n");
goto delete_obj;
}
err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
if (err) {
- printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
+ pr_err("failed to register transparent hugepage group\n");
goto remove_hp_group;
}
@@ -689,8 +690,7 @@ static int __init setup_transparent_hugepage(char *str)
}
out:
if (!ret)
- printk(KERN_WARNING
- "transparent_hugepage= cannot parse, ignored\n");
+ pr_warn("transparent_hugepage= cannot parse, ignored\n");
return ret;
}
__setup("transparent_hugepage=", setup_transparent_hugepage);
@@ -1807,7 +1807,7 @@ static void __split_huge_page(struct page *page,
struct list_head *list)
{
int mapcount, mapcount2;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_pgoff(page);
struct anon_vma_chain *avc;
BUG_ON(!PageHead(page));
@@ -1830,10 +1830,11 @@ static void __split_huge_page(struct page *page,
* the newly established pmd of the child later during the
* walk, to be able to set it as pmd_trans_splitting too.
*/
- if (mapcount != page_mapcount(page))
- printk(KERN_ERR "mapcount %d page_mapcount %d\n",
- mapcount, page_mapcount(page));
- BUG_ON(mapcount != page_mapcount(page));
+ if (mapcount != page_mapcount(page)) {
+ pr_err("mapcount %d page_mapcount %d\n",
+ mapcount, page_mapcount(page));
+ BUG();
+ }
__split_huge_page_refcount(page, list);
@@ -1844,10 +1845,11 @@ static void __split_huge_page(struct page *page,
BUG_ON(is_vma_temporary_stack(vma));
mapcount2 += __split_huge_page_map(page, vma, addr);
}
- if (mapcount != mapcount2)
- printk(KERN_ERR "mapcount %d mapcount2 %d page_mapcount %d\n",
- mapcount, mapcount2, page_mapcount(page));
- BUG_ON(mapcount != mapcount2);
+ if (mapcount != mapcount2) {
+ pr_err("mapcount %d mapcount2 %d page_mapcount %d\n",
+ mapcount, mapcount2, page_mapcount(page));
+ BUG();
+ }
}
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c82290b9c1fc..e73f7bccd10c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,7 @@
#include <linux/io.h>
#include <linux/hugetlb.h>
+#include <linux/hugetlb_inline.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/node.h>
#include "internal.h"
@@ -607,25 +608,242 @@ err:
return NULL;
}
+/*
+ * common helper functions for hstate_next_node_to_{alloc|free}.
+ * We may have allocated or freed a huge page based on a different
+ * nodes_allowed previously, so h->next_node_to_{alloc|free} might
+ * be outside of *nodes_allowed. Ensure that we use an allowed
+ * node for alloc or free.
+ */
+static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
+{
+ nid = next_node(nid, *nodes_allowed);
+ if (nid == MAX_NUMNODES)
+ nid = first_node(*nodes_allowed);
+ VM_BUG_ON(nid >= MAX_NUMNODES);
+
+ return nid;
+}
+
+static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
+{
+ if (!node_isset(nid, *nodes_allowed))
+ nid = next_node_allowed(nid, nodes_allowed);
+ return nid;
+}
+
+/*
+ * returns the previously saved node ["this node"] from which to
+ * allocate a persistent huge page for the pool and advance the
+ * next node from which to allocate, handling wrap at end of node
+ * mask.
+ */
+static int hstate_next_node_to_alloc(struct hstate *h,
+ nodemask_t *nodes_allowed)
+{
+ int nid;
+
+ VM_BUG_ON(!nodes_allowed);
+
+ nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
+ h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
+
+ return nid;
+}
+
+/*
+ * helper for free_pool_huge_page() - return the previously saved
+ * node ["this node"] from which to free a huge page. Advance the
+ * next node id whether or not we find a free huge page to free so
+ * that the next attempt to free addresses the next node.
+ */
+static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
+{
+ int nid;
+
+ VM_BUG_ON(!nodes_allowed);
+
+ nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
+ h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
+
+ return nid;
+}
+
+#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \
+ for (nr_nodes = nodes_weight(*mask); \
+ nr_nodes > 0 && \
+ ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \
+ nr_nodes--)
+
+#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \
+ for (nr_nodes = nodes_weight(*mask); \
+ nr_nodes > 0 && \
+ ((node = hstate_next_node_to_free(hs, mask)) || 1); \
+ nr_nodes--)
+
+#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+static void destroy_compound_gigantic_page(struct page *page,
+ unsigned long order)
+{
+ int i;
+ int nr_pages = 1 << order;
+ struct page *p = page + 1;
+
+ for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+ __ClearPageTail(p);
+ set_page_refcounted(p);
+ p->first_page = NULL;
+ }
+
+ set_compound_order(page, 0);
+ __ClearPageHead(page);
+}
+
+static void free_gigantic_page(struct page *page, unsigned order)
+{
+ free_contig_range(page_to_pfn(page), 1 << order);
+}
+
+static int __alloc_gigantic_page(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long end_pfn = start_pfn + nr_pages;
+ return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+}
+
+static bool pfn_range_valid_gigantic(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long i, end_pfn = start_pfn + nr_pages;
+ struct page *page;
+
+ for (i = start_pfn; i < end_pfn; i++) {
+ if (!pfn_valid(i))
+ return false;
+
+ page = pfn_to_page(i);
+
+ if (PageReserved(page))
+ return false;
+
+ if (page_count(page) > 0)
+ return false;
+
+ if (PageHuge(page))
+ return false;
+ }
+
+ return true;
+}
+
+static bool zone_spans_last_pfn(const struct zone *zone,
+ unsigned long start_pfn, unsigned long nr_pages)
+{
+ unsigned long last_pfn = start_pfn + nr_pages - 1;
+ return zone_spans_pfn(zone, last_pfn);
+}
+
+static struct page *alloc_gigantic_page(int nid, unsigned order)
+{
+ unsigned long nr_pages = 1 << order;
+ unsigned long ret, pfn, flags;
+ struct zone *z;
+
+ z = NODE_DATA(nid)->node_zones;
+ for (; z - NODE_DATA(nid)->node_zones < MAX_NR_ZONES; z++) {
+ spin_lock_irqsave(&z->lock, flags);
+
+ pfn = ALIGN(z->zone_start_pfn, nr_pages);
+ while (zone_spans_last_pfn(z, pfn, nr_pages)) {
+ if (pfn_range_valid_gigantic(pfn, nr_pages)) {
+ /*
+ * We release the zone lock here because
+ * alloc_contig_range() will also lock the zone
+ * at some point. If there's an allocation
+ * spinning on this lock, it may win the race
+ * and cause alloc_contig_range() to fail...
+ */
+ spin_unlock_irqrestore(&z->lock, flags);
+ ret = __alloc_gigantic_page(pfn, nr_pages);
+ if (!ret)
+ return pfn_to_page(pfn);
+ spin_lock_irqsave(&z->lock, flags);
+ }
+ pfn += nr_pages;
+ }
+
+ spin_unlock_irqrestore(&z->lock, flags);
+ }
+
+ return NULL;
+}
+
+static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
+static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+
+static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
+{
+ struct page *page;
+
+ page = alloc_gigantic_page(nid, huge_page_order(h));
+ if (page) {
+ prep_compound_gigantic_page(page, huge_page_order(h));
+ prep_new_huge_page(h, page, nid);
+ }
+
+ return page;
+}
+
+static int alloc_fresh_gigantic_page(struct hstate *h,
+ nodemask_t *nodes_allowed)
+{
+ struct page *page = NULL;
+ int nr_nodes, node;
+
+ for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+ page = alloc_fresh_gigantic_page_node(h, node);
+ if (page)
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline bool gigantic_page_supported(void) { return true; }
+#else
+static inline bool gigantic_page_supported(void) { return false; }
+static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void destroy_compound_gigantic_page(struct page *page,
+ unsigned long order) { }
+static inline int alloc_fresh_gigantic_page(struct hstate *h,
+ nodemask_t *nodes_allowed) { return 0; }
+#endif
+
static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
- VM_BUG_ON(h->order >= MAX_ORDER);
+ if (hstate_is_gigantic(h) && !gigantic_page_supported())
+ return;
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h); i++) {
page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
- 1 << PG_active | 1 << PG_reserved |
- 1 << PG_private | 1 << PG_writeback);
+ 1 << PG_active | 1 << PG_private |
+ 1 << PG_writeback);
}
VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
set_compound_page_dtor(page, NULL);
set_page_refcounted(page);
- arch_release_hugepage(page);
- __free_pages(page, huge_page_order(h));
+ if (hstate_is_gigantic(h)) {
+ destroy_compound_gigantic_page(page, huge_page_order(h));
+ free_gigantic_page(page, huge_page_order(h));
+ } else {
+ arch_release_hugepage(page);
+ __free_pages(page, huge_page_order(h));
+ }
}
struct hstate *size_to_hstate(unsigned long size)
@@ -664,7 +882,7 @@ static void free_huge_page(struct page *page)
if (restore_reserve)
h->resv_huge_pages++;
- if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
+ if (h->surplus_huge_pages_node[nid]) {
/* remove the page from active list */
list_del(&page->lru);
update_and_free_page(h, page);
@@ -690,8 +908,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
put_page(page); /* free it into the hugepage allocator */
}
-static void __init prep_compound_gigantic_page(struct page *page,
- unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
@@ -769,9 +986,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
- if (h->order >= MAX_ORDER)
- return NULL;
-
page = alloc_pages_exact_node(nid,
htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN,
@@ -787,79 +1001,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
return page;
}
-/*
- * common helper functions for hstate_next_node_to_{alloc|free}.
- * We may have allocated or freed a huge page based on a different
- * nodes_allowed previously, so h->next_node_to_{alloc|free} might
- * be outside of *nodes_allowed. Ensure that we use an allowed
- * node for alloc or free.
- */
-static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
-{
- nid = next_node(nid, *nodes_allowed);
- if (nid == MAX_NUMNODES)
- nid = first_node(*nodes_allowed);
- VM_BUG_ON(nid >= MAX_NUMNODES);
-
- return nid;
-}
-
-static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
-{
- if (!node_isset(nid, *nodes_allowed))
- nid = next_node_allowed(nid, nodes_allowed);
- return nid;
-}
-
-/*
- * returns the previously saved node ["this node"] from which to
- * allocate a persistent huge page for the pool and advance the
- * next node from which to allocate, handling wrap at end of node
- * mask.
- */
-static int hstate_next_node_to_alloc(struct hstate *h,
- nodemask_t *nodes_allowed)
-{
- int nid;
-
- VM_BUG_ON(!nodes_allowed);
-
- nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
- h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
-
- return nid;
-}
-
-/*
- * helper for free_pool_huge_page() - return the previously saved
- * node ["this node"] from which to free a huge page. Advance the
- * next node id whether or not we find a free huge page to free so
- * that the next attempt to free addresses the next node.
- */
-static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
-{
- int nid;
-
- VM_BUG_ON(!nodes_allowed);
-
- nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
- h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
-
- return nid;
-}
-
-#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \
- for (nr_nodes = nodes_weight(*mask); \
- nr_nodes > 0 && \
- ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \
- nr_nodes--)
-
-#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \
- for (nr_nodes = nodes_weight(*mask); \
- nr_nodes > 0 && \
- ((node = hstate_next_node_to_free(hs, mask)) || 1); \
- nr_nodes--)
-
static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
{
struct page *page;
@@ -963,7 +1104,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
struct page *page;
unsigned int r_nid;
- if (h->order >= MAX_ORDER)
+ if (hstate_is_gigantic(h))
return NULL;
/*
@@ -1156,7 +1297,7 @@ static void return_unused_surplus_pages(struct hstate *h,
h->resv_huge_pages -= unused_resv_pages;
/* Cannot return gigantic pages currently */
- if (h->order >= MAX_ORDER)
+ if (hstate_is_gigantic(h))
return;
nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
@@ -1356,7 +1497,7 @@ static void __init gather_bootmem_prealloc(void)
* fix confusing memory reports from free(1) and another
* side-effects, like CommitLimit going negative.
*/
- if (h->order > (MAX_ORDER - 1))
+ if (hstate_is_gigantic(h))
adjust_managed_page_count(page, 1 << h->order);
}
}
@@ -1366,7 +1507,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
unsigned long i;
for (i = 0; i < h->max_huge_pages; ++i) {
- if (h->order >= MAX_ORDER) {
+ if (hstate_is_gigantic(h)) {
if (!alloc_bootmem_huge_page(h))
break;
} else if (!alloc_fresh_huge_page(h,
@@ -1382,7 +1523,7 @@ static void __init hugetlb_init_hstates(void)
for_each_hstate(h) {
/* oversize hugepages were init'ed in early boot */
- if (h->order < MAX_ORDER)
+ if (!hstate_is_gigantic(h))
hugetlb_hstate_alloc_pages(h);
}
}
@@ -1416,7 +1557,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
{
int i;
- if (h->order >= MAX_ORDER)
+ if (hstate_is_gigantic(h))
return;
for_each_node_mask(i, *nodes_allowed) {
@@ -1479,7 +1620,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
{
unsigned long min_count, ret;
- if (h->order >= MAX_ORDER)
+ if (hstate_is_gigantic(h) && !gigantic_page_supported())
return h->max_huge_pages;
/*
@@ -1506,7 +1647,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
* and reducing the surplus.
*/
spin_unlock(&hugetlb_lock);
- ret = alloc_fresh_huge_page(h, nodes_allowed);
+ if (hstate_is_gigantic(h))
+ ret = alloc_fresh_gigantic_page(h, nodes_allowed);
+ else
+ ret = alloc_fresh_huge_page(h, nodes_allowed);
spin_lock(&hugetlb_lock);
if (!ret)
goto out;
@@ -1606,7 +1750,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
goto out;
h = kobj_to_hstate(kobj, &nid);
- if (h->order >= MAX_ORDER) {
+ if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
err = -EINVAL;
goto out;
}
@@ -1689,7 +1833,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
unsigned long input;
struct hstate *h = kobj_to_hstate(kobj, NULL);
- if (h->order >= MAX_ORDER)
+ if (hstate_is_gigantic(h))
return -EINVAL;
err = kstrtoul(buf, 10, &input);
@@ -2113,7 +2257,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
tmp = h->max_huge_pages;
- if (write && h->order >= MAX_ORDER)
+ if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
return -EINVAL;
table->data = &tmp;
@@ -2169,7 +2313,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
tmp = h->nr_overcommit_huge_pages;
- if (write && h->order >= MAX_ORDER)
+ if (write && hstate_is_gigantic(h))
return -EINVAL;
table->data = &tmp;
diff --git a/mm/internal.h b/mm/internal.h
index 07b67361a40a..802c3a4fc03a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -134,7 +134,7 @@ struct compact_control {
unsigned long nr_migratepages; /* Number of pages to migrate */
unsigned long free_pfn; /* isolate_freepages search base */
unsigned long migrate_pfn; /* isolate_migratepages search base */
- bool sync; /* Synchronous migration */
+ enum migrate_mode mode; /* Async or sync migration mode */
bool ignore_skip_hint; /* Scan blocks even if marked skip */
bool finished_update_free; /* True when the zone cached pfns are
* no longer being updated
@@ -169,6 +169,11 @@ static inline unsigned long page_order(struct page *page)
return page_private(page);
}
+static inline bool is_cow_mapping(vm_flags_t flags)
+{
+ return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
/* mm/util.c */
void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node *rb_parent);
@@ -184,26 +189,6 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
}
/*
- * Called only in fault path, to determine if a new page is being
- * mapped into a LOCKED vma. If it is, mark page as mlocked.
- */
-static inline int mlocked_vma_newpage(struct vm_area_struct *vma,
- struct page *page)
-{
- VM_BUG_ON_PAGE(PageLRU(page), page);
-
- if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
- return 0;
-
- if (!TestSetPageMlocked(page)) {
- mod_zone_page_state(page_zone(page), NR_MLOCK,
- hpage_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGMLOCKED);
- }
- return 1;
-}
-
-/*
* must be called with vma's mmap_sem held for read or write, and page locked.
*/
extern void mlock_vma_page(struct page *page);
@@ -245,10 +230,6 @@ extern unsigned long vma_address(struct page *page,
struct vm_area_struct *vma);
#endif
#else /* !CONFIG_MMU */
-static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p)
-{
- return 0;
-}
static inline void clear_page_mlock(struct page *page) { }
static inline void mlock_vma_page(struct page *page) { }
static inline void mlock_migrate_page(struct page *new, struct page *old) { }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8d2fcdfeff7f..3cda50c1e394 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -387,7 +387,7 @@ static void dump_object_info(struct kmemleak_object *object)
pr_notice(" min_count = %d\n", object->min_count);
pr_notice(" count = %d\n", object->count);
pr_notice(" flags = 0x%lx\n", object->flags);
- pr_notice(" checksum = %d\n", object->checksum);
+ pr_notice(" checksum = %u\n", object->checksum);
pr_notice(" backtrace:\n");
print_stack_trace(&trace, 4);
}
@@ -990,6 +990,40 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
/**
+ * kmemleak_update_trace - update object allocation stack trace
+ * @ptr: pointer to beginning of the object
+ *
+ * Override the object allocation stack trace for cases where the actual
+ * allocation place is not always useful.
+ */
+void __ref kmemleak_update_trace(const void *ptr)
+{
+ struct kmemleak_object *object;
+ unsigned long flags;
+
+ pr_debug("%s(0x%p)\n", __func__, ptr);
+
+ if (!kmemleak_enabled || IS_ERR_OR_NULL(ptr))
+ return;
+
+ object = find_and_get_object((unsigned long)ptr, 1);
+ if (!object) {
+#ifdef DEBUG
+ kmemleak_warn("Updating stack trace for unknown object at %p\n",
+ ptr);
+#endif
+ return;
+ }
+
+ spin_lock_irqsave(&object->lock, flags);
+ object->trace_len = __save_stack_trace(object->trace);
+ spin_unlock_irqrestore(&object->lock, flags);
+
+ put_object(object);
+}
+EXPORT_SYMBOL(kmemleak_update_trace);
+
+/**
* kmemleak_not_leak - mark an allocated object as false positive
* @ptr: pointer to beginning of the object
*
@@ -1300,7 +1334,7 @@ static void kmemleak_scan(void)
/*
* Struct page scanning for each node.
*/
- lock_memory_hotplug();
+ get_online_mems();
for_each_online_node(i) {
unsigned long start_pfn = node_start_pfn(i);
unsigned long end_pfn = node_end_pfn(i);
@@ -1318,7 +1352,7 @@ static void kmemleak_scan(void)
scan_block(page, page + 1, NULL, 1);
}
}
- unlock_memory_hotplug();
+ put_online_mems();
/*
* Scanning the task stacks (may introduce false negatives).
diff --git a/mm/madvise.c b/mm/madvise.c
index 539eeb96b323..a402f8fdc68e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
for (; start < end; start += PAGE_SIZE) {
index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- page = find_get_page(mapping, index);
+ page = find_get_entry(mapping, index);
if (!radix_tree_exceptional_entry(page)) {
if (page)
page_cache_release(page);
diff --git a/mm/memblock.c b/mm/memblock.c
index a810ba923cdd..6d2f219a48b0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -691,6 +691,7 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
(unsigned long long)base + size - 1,
(void *)_RET_IP_);
+ kmemleak_free_part(__va(base), size);
return memblock_remove_range(&memblock.reserved, base, size);
}
@@ -1033,22 +1034,40 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
}
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
- phys_addr_t align, phys_addr_t max_addr,
- int nid)
+static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
+ phys_addr_t align, phys_addr_t start,
+ phys_addr_t end, int nid)
{
phys_addr_t found;
if (!align)
align = SMP_CACHE_BYTES;
- found = memblock_find_in_range_node(size, align, 0, max_addr, nid);
- if (found && !memblock_reserve(found, size))
+ found = memblock_find_in_range_node(size, align, start, end, nid);
+ if (found && !memblock_reserve(found, size)) {
+ /*
+ * The min_count is set to 0 so that memblock allocations are
+ * never reported as leaks.
+ */
+ kmemleak_alloc(__va(found), size, 0, 0);
return found;
-
+ }
return 0;
}
+phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
+ phys_addr_t start, phys_addr_t end)
+{
+ return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
+}
+
+static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
+ phys_addr_t align, phys_addr_t max_addr,
+ int nid)
+{
+ return memblock_alloc_range_nid(size, align, 0, max_addr, nid);
+}
+
phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
{
return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
@@ -1389,9 +1408,8 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
if (mid == -1)
return -1;
- *start_pfn = type->regions[mid].base >> PAGE_SHIFT;
- *end_pfn = (type->regions[mid].base + type->regions[mid].size)
- >> PAGE_SHIFT;
+ *start_pfn = PFN_DOWN(type->regions[mid].base);
+ *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size);
return type->regions[mid].nid;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 66447745d0c0..98e38b50cc82 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,7 +80,7 @@ int do_swap_account __read_mostly;
#ifdef CONFIG_MEMCG_SWAP_ENABLED
static int really_do_swap_account __initdata = 1;
#else
-static int really_do_swap_account __initdata = 0;
+static int really_do_swap_account __initdata;
#endif
#else
@@ -357,10 +357,9 @@ struct mem_cgroup {
struct cg_proto tcp_mem;
#endif
#if defined(CONFIG_MEMCG_KMEM)
- /* analogous to slab_common's slab_caches list. per-memcg */
+ /* analogous to slab_common's slab_caches list, but per-memcg;
+ * protected by memcg_slab_mutex */
struct list_head memcg_slab_caches;
- /* Not a spinlock, we can take a lot of time walking the list */
- struct mutex slab_caches_mutex;
/* Index in the kmem_cache->memcg_params->memcg_caches array */
int kmemcg_id;
#endif
@@ -674,9 +673,11 @@ static void disarm_static_keys(struct mem_cgroup *memcg)
static void drain_all_stock_async(struct mem_cgroup *memcg);
static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
+mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
{
- VM_BUG_ON((unsigned)nid >= nr_node_ids);
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
+
return &memcg->nodeinfo[nid]->zoneinfo[zid];
}
@@ -686,12 +687,12 @@ struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
}
static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
+mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
{
int nid = page_to_nid(page);
int zid = page_zonenum(page);
- return mem_cgroup_zoneinfo(memcg, nid, zid);
+ return &memcg->nodeinfo[nid]->zoneinfo[zid];
}
static struct mem_cgroup_tree_per_zone *
@@ -709,11 +710,9 @@ soft_limit_tree_from_page(struct page *page)
return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
}
-static void
-__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
- struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz,
- unsigned long long new_usage_in_excess)
+static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz,
+ unsigned long long new_usage_in_excess)
{
struct rb_node **p = &mctz->rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -743,10 +742,8 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
mz->on_tree = true;
}
-static void
-__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
- struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz)
+static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
{
if (!mz->on_tree)
return;
@@ -754,13 +751,11 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
mz->on_tree = false;
}
-static void
-mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
- struct mem_cgroup_per_zone *mz,
- struct mem_cgroup_tree_per_zone *mctz)
+static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
+ struct mem_cgroup_tree_per_zone *mctz)
{
spin_lock(&mctz->lock);
- __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
spin_unlock(&mctz->lock);
}
@@ -770,16 +765,14 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
unsigned long long excess;
struct mem_cgroup_per_zone *mz;
struct mem_cgroup_tree_per_zone *mctz;
- int nid = page_to_nid(page);
- int zid = page_zonenum(page);
- mctz = soft_limit_tree_from_page(page);
+ mctz = soft_limit_tree_from_page(page);
/*
* Necessary to update all ancestors when hierarchy is used.
* because their event counter is not touched.
*/
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
- mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+ mz = mem_cgroup_page_zoneinfo(memcg, page);
excess = res_counter_soft_limit_excess(&memcg->res);
/*
* We have to update the tree if mz is on RB-tree or
@@ -789,12 +782,12 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
spin_lock(&mctz->lock);
/* if on-tree, remove it */
if (mz->on_tree)
- __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
/*
* Insert again. mz->usage_in_excess will be updated.
* If excess is 0, no tree ops.
*/
- __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
+ __mem_cgroup_insert_exceeded(mz, mctz, excess);
spin_unlock(&mctz->lock);
}
}
@@ -802,15 +795,15 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
{
- int node, zone;
- struct mem_cgroup_per_zone *mz;
struct mem_cgroup_tree_per_zone *mctz;
+ struct mem_cgroup_per_zone *mz;
+ int nid, zid;
- for_each_node(node) {
- for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- mz = mem_cgroup_zoneinfo(memcg, node, zone);
- mctz = soft_limit_tree_node_zone(node, zone);
- mem_cgroup_remove_exceeded(memcg, mz, mctz);
+ for_each_node(nid) {
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+ mctz = soft_limit_tree_node_zone(nid, zid);
+ mem_cgroup_remove_exceeded(mz, mctz);
}
}
}
@@ -833,7 +826,7 @@ retry:
* we will to add it back at the end of reclaim to its correct
* position in the tree.
*/
- __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
!css_tryget_online(&mz->memcg->css))
goto retry;
@@ -944,8 +937,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
}
-unsigned long
-mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
+unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
struct mem_cgroup_per_zone *mz;
@@ -953,46 +945,38 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
return mz->lru_size[lru];
}
-static unsigned long
-mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
- unsigned int lru_mask)
-{
- struct mem_cgroup_per_zone *mz;
- enum lru_list lru;
- unsigned long ret = 0;
-
- mz = mem_cgroup_zoneinfo(memcg, nid, zid);
-
- for_each_lru(lru) {
- if (BIT(lru) & lru_mask)
- ret += mz->lru_size[lru];
- }
- return ret;
-}
-
-static unsigned long
-mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
- int nid, unsigned int lru_mask)
+static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
+ int nid,
+ unsigned int lru_mask)
{
- u64 total = 0;
+ unsigned long nr = 0;
int zid;
- for (zid = 0; zid < MAX_NR_ZONES; zid++)
- total += mem_cgroup_zone_nr_lru_pages(memcg,
- nid, zid, lru_mask);
+ VM_BUG_ON((unsigned)nid >= nr_node_ids);
- return total;
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ struct mem_cgroup_per_zone *mz;
+ enum lru_list lru;
+
+ for_each_lru(lru) {
+ if (!(BIT(lru) & lru_mask))
+ continue;
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+ nr += mz->lru_size[lru];
+ }
+ }
+ return nr;
}
static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
unsigned int lru_mask)
{
+ unsigned long nr = 0;
int nid;
- u64 total = 0;
for_each_node_state(nid, N_MEMORY)
- total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
- return total;
+ nr += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
+ return nr;
}
static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
@@ -1232,11 +1216,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
int uninitialized_var(seq);
if (reclaim) {
- int nid = zone_to_nid(reclaim->zone);
- int zid = zone_idx(reclaim->zone);
struct mem_cgroup_per_zone *mz;
- mz = mem_cgroup_zoneinfo(root, nid, zid);
+ mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone);
iter = &mz->reclaim_iter[reclaim->priority];
if (prev && reclaim->generation != iter->generation) {
iter->last_visited = NULL;
@@ -1343,7 +1325,7 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
goto out;
}
- mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
+ mz = mem_cgroup_zone_zoneinfo(memcg, zone);
lruvec = &mz->lruvec;
out:
/*
@@ -1402,7 +1384,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
pc->mem_cgroup = memcg = root_mem_cgroup;
- mz = page_cgroup_zoneinfo(memcg, page);
+ mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
out:
/*
@@ -1584,23 +1566,12 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
}
/*
- * 2 routines for checking "mem" is under move_account() or not.
- *
- * mem_cgroup_stolen() - checking whether a cgroup is mc.from or not. This
- * is used for avoiding races in accounting. If true,
- * pc->mem_cgroup may be overwritten.
+ * A routine for checking "mem" is under move_account() or not.
*
- * mem_cgroup_under_move() - checking a cgroup is mc.from or mc.to or
- * under hierarchy of moving cgroups. This is for
- * waiting at hith-memory prressure caused by "move".
+ * Checking a cgroup is mc.from or mc.to or under hierarchy of
+ * moving cgroups. This is for waiting at high-memory pressure
+ * caused by "move".
*/
-
-static bool mem_cgroup_stolen(struct mem_cgroup *memcg)
-{
- VM_BUG_ON(!rcu_read_lock_held());
- return atomic_read(&memcg->moving_account) > 0;
-}
-
static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
{
struct mem_cgroup *from;
@@ -1643,7 +1614,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
* Take this lock when
* - a code tries to modify page's memcg while it's USED.
* - a code tries to modify page state accounting in a memcg.
- * see mem_cgroup_stolen(), too.
*/
static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
unsigned long *flags)
@@ -1687,8 +1657,9 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
rcu_read_unlock();
- pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n",
+ pr_info("memory: usage %llukB, low_limit %llukB limit %llukB, failcnt %llu\n",
res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
+ res_counter_read_u64(&memcg->res, RES_LOW_LIMIT) >> 10,
res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
res_counter_read_u64(&memcg->res, RES_FAILCNT));
pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %llu\n",
@@ -2278,12 +2249,11 @@ cleanup:
}
/*
- * Currently used to update mapped file statistics, but the routine can be
- * generalized to update other statistics as well.
+ * Used to update mapped file or writeback or other statistics.
*
* Notes: Race condition
*
- * We usually use page_cgroup_lock() for accessing page_cgroup member but
+ * We usually use lock_page_cgroup() for accessing page_cgroup member but
* it tends to be costly. But considering some conditions, we doesn't need
* to do so _always_.
*
@@ -2297,8 +2267,8 @@ cleanup:
* by flags.
*
* Considering "move", this is an only case we see a race. To make the race
- * small, we check mm->moving_account and detect there are possibility of race
- * If there is, we take a lock.
+ * small, we check memcg->moving_account and detect there are possibility
+ * of race or not. If there is, we take a lock.
*/
void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2316,9 +2286,10 @@ again:
* If this memory cgroup is not under account moving, we don't
* need to take move_lock_mem_cgroup(). Because we already hold
* rcu_read_lock(), any calls to move_account will be delayed until
- * rcu_read_unlock() if mem_cgroup_stolen() == true.
+ * rcu_read_unlock().
*/
- if (!mem_cgroup_stolen(memcg))
+ VM_BUG_ON(!rcu_read_lock_held());
+ if (atomic_read(&memcg->moving_account) <= 0)
return;
move_lock_mem_cgroup(memcg, flags);
@@ -2426,7 +2397,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
*/
static void drain_local_stock(struct work_struct *dummy)
{
- struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock);
+ struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
}
@@ -2673,7 +2644,8 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
* free their memory.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE) ||
- fatal_signal_pending(current)))
+ fatal_signal_pending(current) ||
+ current->flags & PF_EXITING))
goto bypass;
if (unlikely(task_in_memcg_oom(current)))
@@ -2799,6 +2771,30 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
return mem_cgroup_from_id(id);
}
+/**
+ * mem_cgroup_within_guarantee - checks whether given memcg is within its
+ * memory guarantee
+ * @memcg: target memcg for the reclaim
+ * @root: root of the reclaim hierarchy (null for the global reclaim)
+ *
+ * The given group is within its reclaim gurantee if it is below its low limit
+ * or the same applies for any parent up the hierarchy until root (including).
+ * Such a group might be excluded from the reclaim.
+ */
+bool mem_cgroup_within_guarantee(struct mem_cgroup *memcg,
+ struct mem_cgroup *root)
+{
+ do {
+ if (!res_counter_low_limit_excess(&memcg->res))
+ return true;
+ if (memcg == root)
+ break;
+
+ } while ((memcg = parent_mem_cgroup(memcg)));
+
+ return false;
+}
+
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
struct mem_cgroup *memcg = NULL;
@@ -2901,6 +2897,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
static DEFINE_MUTEX(set_limit_mutex);
#ifdef CONFIG_MEMCG_KMEM
+/*
+ * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
+ * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
+ */
+static DEFINE_MUTEX(memcg_slab_mutex);
+
static DEFINE_MUTEX(activate_kmem_mutex);
static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
@@ -2933,10 +2935,10 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
print_slabinfo_header(m);
- mutex_lock(&memcg->slab_caches_mutex);
+ mutex_lock(&memcg_slab_mutex);
list_for_each_entry(params, &memcg->memcg_slab_caches, list)
cache_show(memcg_params_to_cache(params), m);
- mutex_unlock(&memcg->slab_caches_mutex);
+ mutex_unlock(&memcg_slab_mutex);
return 0;
}
@@ -3038,8 +3040,6 @@ void memcg_update_array_size(int num)
memcg_limited_groups_array_size = memcg_caches_array_size(num);
}
-static void kmem_cache_destroy_work_func(struct work_struct *w);
-
int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
{
struct memcg_cache_params *cur_params = s->memcg_params;
@@ -3092,29 +3092,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
return 0;
}
-char *memcg_create_cache_name(struct mem_cgroup *memcg,
- struct kmem_cache *root_cache)
-{
- static char *buf = NULL;
-
- /*
- * We need a mutex here to protect the shared buffer. Since this is
- * expected to be called only on cache creation, we can employ the
- * slab_mutex for that purpose.
- */
- lockdep_assert_held(&slab_mutex);
-
- if (!buf) {
- buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!buf)
- return NULL;
- }
-
- cgroup_name(memcg->css.cgroup, buf, NAME_MAX + 1);
- return kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
- memcg_cache_id(memcg), buf);
-}
-
int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache)
{
@@ -3136,8 +3113,6 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
if (memcg) {
s->memcg_params->memcg = memcg;
s->memcg_params->root_cache = root_cache;
- INIT_WORK(&s->memcg_params->destroy,
- kmem_cache_destroy_work_func);
css_get(&memcg->css);
} else
s->memcg_params->is_root_cache = true;
@@ -3154,24 +3129,37 @@ void memcg_free_cache_params(struct kmem_cache *s)
kfree(s->memcg_params);
}
-void memcg_register_cache(struct kmem_cache *s)
+static void memcg_register_cache(struct mem_cgroup *memcg,
+ struct kmem_cache *root_cache)
{
- struct kmem_cache *root;
- struct mem_cgroup *memcg;
+ static char memcg_name_buf[NAME_MAX + 1]; /* protected by
+ memcg_slab_mutex */
+ struct kmem_cache *cachep;
int id;
- if (is_root_cache(s))
+ lockdep_assert_held(&memcg_slab_mutex);
+
+ id = memcg_cache_id(memcg);
+
+ /*
+ * Since per-memcg caches are created asynchronously on first
+ * allocation (see memcg_kmem_get_cache()), several threads can try to
+ * create the same cache, but only one of them may succeed.
+ */
+ if (cache_from_memcg_idx(root_cache, id))
return;
+ cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
+ cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf);
/*
- * Holding the slab_mutex assures nobody will touch the memcg_caches
- * array while we are modifying it.
+ * If we could not create a memcg cache, do not complain, because
+ * that's not critical at all as we can always proceed with the root
+ * cache.
*/
- lockdep_assert_held(&slab_mutex);
+ if (!cachep)
+ return;
- root = s->memcg_params->root_cache;
- memcg = s->memcg_params->memcg;
- id = memcg_cache_id(memcg);
+ list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
/*
* Since readers won't lock (see cache_from_memcg_idx()), we need a
@@ -3180,49 +3168,30 @@ void memcg_register_cache(struct kmem_cache *s)
*/
smp_wmb();
- /*
- * Initialize the pointer to this cache in its parent's memcg_params
- * before adding it to the memcg_slab_caches list, otherwise we can
- * fail to convert memcg_params_to_cache() while traversing the list.
- */
- VM_BUG_ON(root->memcg_params->memcg_caches[id]);
- root->memcg_params->memcg_caches[id] = s;
-
- mutex_lock(&memcg->slab_caches_mutex);
- list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
- mutex_unlock(&memcg->slab_caches_mutex);
+ BUG_ON(root_cache->memcg_params->memcg_caches[id]);
+ root_cache->memcg_params->memcg_caches[id] = cachep;
}
-void memcg_unregister_cache(struct kmem_cache *s)
+static void memcg_unregister_cache(struct kmem_cache *cachep)
{
- struct kmem_cache *root;
+ struct kmem_cache *root_cache;
struct mem_cgroup *memcg;
int id;
- if (is_root_cache(s))
- return;
+ lockdep_assert_held(&memcg_slab_mutex);
- /*
- * Holding the slab_mutex assures nobody will touch the memcg_caches
- * array while we are modifying it.
- */
- lockdep_assert_held(&slab_mutex);
+ BUG_ON(is_root_cache(cachep));
- root = s->memcg_params->root_cache;
- memcg = s->memcg_params->memcg;
+ root_cache = cachep->memcg_params->root_cache;
+ memcg = cachep->memcg_params->memcg;
id = memcg_cache_id(memcg);
- mutex_lock(&memcg->slab_caches_mutex);
- list_del(&s->memcg_params->list);
- mutex_unlock(&memcg->slab_caches_mutex);
+ BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
+ root_cache->memcg_params->memcg_caches[id] = NULL;
- /*
- * Clear the pointer to this cache in its parent's memcg_params only
- * after removing it from the memcg_slab_caches list, otherwise we can
- * fail to convert memcg_params_to_cache() while traversing the list.
- */
- VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
- root->memcg_params->memcg_caches[id] = NULL;
+ list_del(&cachep->memcg_params->list);
+
+ kmem_cache_destroy(cachep);
}
/*
@@ -3256,144 +3225,61 @@ static inline void memcg_resume_kmem_account(void)
current->memcg_kmem_skip_account--;
}
-static void kmem_cache_destroy_work_func(struct work_struct *w)
-{
- struct kmem_cache *cachep;
- struct memcg_cache_params *p;
-
- p = container_of(w, struct memcg_cache_params, destroy);
-
- cachep = memcg_params_to_cache(p);
-
- /*
- * If we get down to 0 after shrink, we could delete right away.
- * However, memcg_release_pages() already puts us back in the workqueue
- * in that case. If we proceed deleting, we'll get a dangling
- * reference, and removing the object from the workqueue in that case
- * is unnecessary complication. We are not a fast path.
- *
- * Note that this case is fundamentally different from racing with
- * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
- * kmem_cache_shrink, not only we would be reinserting a dead cache
- * into the queue, but doing so from inside the worker racing to
- * destroy it.
- *
- * So if we aren't down to zero, we'll just schedule a worker and try
- * again
- */
- if (atomic_read(&cachep->memcg_params->nr_pages) != 0)
- kmem_cache_shrink(cachep);
- else
- kmem_cache_destroy(cachep);
-}
-
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
-{
- if (!cachep->memcg_params->dead)
- return;
-
- /*
- * There are many ways in which we can get here.
- *
- * We can get to a memory-pressure situation while the delayed work is
- * still pending to run. The vmscan shrinkers can then release all
- * cache memory and get us to destruction. If this is the case, we'll
- * be executed twice, which is a bug (the second time will execute over
- * bogus data). In this case, cancelling the work should be fine.
- *
- * But we can also get here from the worker itself, if
- * kmem_cache_shrink is enough to shake all the remaining objects and
- * get the page count to 0. In this case, we'll deadlock if we try to
- * cancel the work (the worker runs with an internal lock held, which
- * is the same lock we would hold for cancel_work_sync().)
- *
- * Since we can't possibly know who got us here, just refrain from
- * running if there is already work pending
- */
- if (work_pending(&cachep->memcg_params->destroy))
- return;
- /*
- * We have to defer the actual destroying to a workqueue, because
- * we might currently be in a context that cannot sleep.
- */
- schedule_work(&cachep->memcg_params->destroy);
-}
-
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+int __memcg_cleanup_cache_params(struct kmem_cache *s)
{
struct kmem_cache *c;
int i, failed = 0;
- /*
- * If the cache is being destroyed, we trust that there is no one else
- * requesting objects from it. Even if there are, the sanity checks in
- * kmem_cache_destroy should caught this ill-case.
- *
- * Still, we don't want anyone else freeing memcg_caches under our
- * noses, which can happen if a new memcg comes to life. As usual,
- * we'll take the activate_kmem_mutex to protect ourselves against
- * this.
- */
- mutex_lock(&activate_kmem_mutex);
+ mutex_lock(&memcg_slab_mutex);
for_each_memcg_cache_index(i) {
c = cache_from_memcg_idx(s, i);
if (!c)
continue;
- /*
- * We will now manually delete the caches, so to avoid races
- * we need to cancel all pending destruction workers and
- * proceed with destruction ourselves.
- *
- * kmem_cache_destroy() will call kmem_cache_shrink internally,
- * and that could spawn the workers again: it is likely that
- * the cache still have active pages until this very moment.
- * This would lead us back to mem_cgroup_destroy_cache.
- *
- * But that will not execute at all if the "dead" flag is not
- * set, so flip it down to guarantee we are in control.
- */
- c->memcg_params->dead = false;
- cancel_work_sync(&c->memcg_params->destroy);
- kmem_cache_destroy(c);
+ memcg_unregister_cache(c);
if (cache_from_memcg_idx(s, i))
failed++;
}
- mutex_unlock(&activate_kmem_mutex);
+ mutex_unlock(&memcg_slab_mutex);
return failed;
}
-static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
{
struct kmem_cache *cachep;
- struct memcg_cache_params *params;
+ struct memcg_cache_params *params, *tmp;
if (!memcg_kmem_is_active(memcg))
return;
- mutex_lock(&memcg->slab_caches_mutex);
- list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+ mutex_lock(&memcg_slab_mutex);
+ list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
cachep = memcg_params_to_cache(params);
- cachep->memcg_params->dead = true;
- schedule_work(&cachep->memcg_params->destroy);
+ kmem_cache_shrink(cachep);
+ if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+ memcg_unregister_cache(cachep);
}
- mutex_unlock(&memcg->slab_caches_mutex);
+ mutex_unlock(&memcg_slab_mutex);
}
-struct create_work {
+struct memcg_register_cache_work {
struct mem_cgroup *memcg;
struct kmem_cache *cachep;
struct work_struct work;
};
-static void memcg_create_cache_work_func(struct work_struct *w)
+static void memcg_register_cache_func(struct work_struct *w)
{
- struct create_work *cw = container_of(w, struct create_work, work);
+ struct memcg_register_cache_work *cw =
+ container_of(w, struct memcg_register_cache_work, work);
struct mem_cgroup *memcg = cw->memcg;
struct kmem_cache *cachep = cw->cachep;
- kmem_cache_create_memcg(memcg, cachep);
+ mutex_lock(&memcg_slab_mutex);
+ memcg_register_cache(memcg, cachep);
+ mutex_unlock(&memcg_slab_mutex);
+
css_put(&memcg->css);
kfree(cw);
}
@@ -3401,12 +3287,12 @@ static void memcg_create_cache_work_func(struct work_struct *w)
/*
* Enqueue the creation of a per-memcg kmem_cache.
*/
-static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
- struct kmem_cache *cachep)
+static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
+ struct kmem_cache *cachep)
{
- struct create_work *cw;
+ struct memcg_register_cache_work *cw;
- cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+ cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
if (cw == NULL) {
css_put(&memcg->css);
return;
@@ -3415,17 +3301,17 @@ static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
cw->memcg = memcg;
cw->cachep = cachep;
- INIT_WORK(&cw->work, memcg_create_cache_work_func);
+ INIT_WORK(&cw->work, memcg_register_cache_func);
schedule_work(&cw->work);
}
-static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
- struct kmem_cache *cachep)
+static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
+ struct kmem_cache *cachep)
{
/*
* We need to stop accounting when we kmalloc, because if the
* corresponding kmalloc cache is not yet created, the first allocation
- * in __memcg_create_cache_enqueue will recurse.
+ * in __memcg_schedule_register_cache will recurse.
*
* However, it is better to enclose the whole function. Depending on
* the debugging options enabled, INIT_WORK(), for instance, can
@@ -3434,9 +3320,27 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
* the safest choice is to do it like this, wrapping the whole function.
*/
memcg_stop_kmem_account();
- __memcg_create_cache_enqueue(memcg, cachep);
+ __memcg_schedule_register_cache(memcg, cachep);
memcg_resume_kmem_account();
}
+
+int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
+{
+ int res;
+
+ res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp,
+ PAGE_SIZE << order);
+ if (!res)
+ atomic_add(1 << order, &cachep->memcg_params->nr_pages);
+ return res;
+}
+
+void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
+{
+ memcg_uncharge_kmem(cachep->memcg_params->memcg, PAGE_SIZE << order);
+ atomic_sub(1 << order, &cachep->memcg_params->nr_pages);
+}
+
/*
* Return the kmem_cache we're supposed to use for a slab allocation.
* We try to use the current memcg's version of the cache.
@@ -3487,22 +3391,16 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
*
* However, there are some clashes that can arrive from locking.
* For instance, because we acquire the slab_mutex while doing
- * kmem_cache_dup, this means no further allocation could happen
- * with the slab_mutex held.
- *
- * Also, because cache creation issue get_online_cpus(), this
- * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
- * that ends up reversed during cpu hotplug. (cpuset allocates
- * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
- * better to defer everything.
+ * memcg_create_kmem_cache, this means no further allocation
+ * could happen with the slab_mutex held. So it's better to
+ * defer everything.
*/
- memcg_create_cache_enqueue(memcg, cachep);
+ memcg_schedule_register_cache(memcg, cachep);
return cachep;
out:
rcu_read_unlock();
return cachep;
}
-EXPORT_SYMBOL(__memcg_kmem_get_cache);
/*
* We need to verify if the allocation against current->mm->owner's memcg is
@@ -3529,11 +3427,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
/*
* Disabling accounting is only relevant for some specific memcg
* internal allocations. Therefore we would initially not have such
- * check here, since direct calls to the page allocator that are marked
- * with GFP_KMEMCG only happen outside memcg core. We are mostly
- * concerned with cache allocations, and by having this test at
- * memcg_kmem_get_cache, we are already able to relay the allocation to
- * the root cache and bypass the memcg cache altogether.
+ * check here, since direct calls to the page allocator that are
+ * accounted to kmemcg (alloc_kmem_pages and friends) only happen
+ * outside memcg core. We are mostly concerned with cache allocations,
+ * and by having this test at memcg_kmem_get_cache, we are already able
+ * to relay the allocation to the root cache and bypass the memcg cache
+ * altogether.
*
* There is one exception, though: the SLUB allocator does not create
* large order caches, but rather service large kmallocs directly from
@@ -3620,7 +3519,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
}
#else
-static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
{
}
#endif /* CONFIG_MEMCG_KMEM */
@@ -4703,7 +4602,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
break;
} while (1);
}
- __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+ __mem_cgroup_remove_exceeded(mz, mctz);
excess = res_counter_soft_limit_excess(&mz->memcg->res);
/*
* One school of thought says that we should not add
@@ -4714,7 +4613,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
* term TODO.
*/
/* If excess == 0, no tree ops */
- __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
+ __mem_cgroup_insert_exceeded(mz, mctz, excess);
spin_unlock(&mctz->lock);
css_put(&mz->memcg->css);
loop++;
@@ -4895,6 +4794,10 @@ static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
if (mem_cgroup_is_root(memcg))
return -EINVAL;
+ pr_info_once("%s (%d): memory.force_empty is deprecated and will be "
+ "removed. Let us know if it is needed in your usecase at "
+ "linux-mm@kvack.org\n",
+ current->comm, task_pid_nr(current));
return mem_cgroup_force_empty(memcg) ?: nbytes;
}
@@ -5060,13 +4963,14 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
* Make sure we have enough space for this cgroup in each root cache's
* memcg_params.
*/
+ mutex_lock(&memcg_slab_mutex);
err = memcg_update_all_caches(memcg_id + 1);
+ mutex_unlock(&memcg_slab_mutex);
if (err)
goto out_rmid;
memcg->kmemcg_id = memcg_id;
INIT_LIST_HEAD(&memcg->memcg_slab_caches);
- mutex_init(&memcg->slab_caches_mutex);
/*
* We couldn't have accounted to this cgroup, because it hasn't got the
@@ -5176,6 +5080,24 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
else
return -EINVAL;
break;
+ case RES_LOW_LIMIT:
+ if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+ ret = -EINVAL;
+ break;
+ }
+ ret = res_counter_memparse_write_strategy(buf, &val);
+ if (ret)
+ break;
+ if (type == _MEM) {
+ ret = res_counter_set_low_limit(&memcg->res, val);
+ break;
+ }
+ /*
+ * memsw low limit doesn't make any sense and kmem is not
+ * implemented yet - if ever
+ */
+ return -EINVAL;
+
case RES_SOFT_LIMIT:
ret = res_counter_memparse_write_strategy(buf, &val);
if (ret)
@@ -5413,7 +5335,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for_each_online_node(nid)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
rstat = &mz->lruvec.reclaim_stat;
recent_rotated[0] += rstat->recent_rotated[0];
@@ -5443,22 +5365,14 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
-
- if (val > 100 || !parent)
- return -EINVAL;
-
- mutex_lock(&memcg_create_mutex);
- /* If under hierarchy, only empty-root can set this value */
- if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
- mutex_unlock(&memcg_create_mutex);
+ if (val > 100)
return -EINVAL;
- }
-
- memcg->swappiness = val;
- mutex_unlock(&memcg_create_mutex);
+ if (css_parent(css))
+ memcg->swappiness = val;
+ else
+ vm_swappiness = val;
return 0;
}
@@ -5790,22 +5704,15 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
/* cannot set to root cgroup and only 0 and 1 are allowed */
- if (!parent || !((val == 0) || (val == 1)))
+ if (!css_parent(css) || !((val == 0) || (val == 1)))
return -EINVAL;
- mutex_lock(&memcg_create_mutex);
- /* oom-kill-disable is a flag for subhierarchy. */
- if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
- mutex_unlock(&memcg_create_mutex);
- return -EINVAL;
- }
memcg->oom_kill_disable = val;
if (!val)
memcg_oom_recover(memcg);
- mutex_unlock(&memcg_create_mutex);
+
return 0;
}
@@ -6117,6 +6024,12 @@ static struct cftype mem_cgroup_files[] = {
.read_u64 = mem_cgroup_read_u64,
},
{
+ .name = "low_limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_MEM, RES_LOW_LIMIT),
+ .write = mem_cgroup_write,
+ .read_u64 = mem_cgroup_read_u64,
+ },
+ {
.name = "soft_limit_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
.write = mem_cgroup_write,
@@ -6134,6 +6047,7 @@ static struct cftype mem_cgroup_files[] = {
},
{
.name = "force_empty",
+ .flags = CFTYPE_INSANE,
.write = mem_cgroup_force_empty_write,
},
{
@@ -6494,7 +6408,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
css_for_each_descendant_post(iter, css)
mem_cgroup_reparent_charges(mem_cgroup_from_css(iter));
- mem_cgroup_destroy_all_caches(memcg);
+ memcg_unregister_all_caches(memcg);
vmpressure_cleanup(&memcg->vmpressure);
}
@@ -6785,30 +6699,29 @@ static inline enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
}
#endif
-static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
+static int mem_cgroup_count_precharge_pte(pte_t *pte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct vm_area_struct *vma = walk->private;
- pte_t *pte;
+ if (get_mctgt_type(walk->vma, addr, *pte, NULL))
+ mc.precharge++; /* increment precharge temporarily */
+ return 0;
+}
+
+static int mem_cgroup_count_precharge_pmd(pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
spinlock_t *ptl;
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
- return 0;
+ /* don't call mem_cgroup_count_precharge_pte() */
+ walk->skip = 1;
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; pte++, addr += PAGE_SIZE)
- if (get_mctgt_type(vma, addr, *pte, NULL))
- mc.precharge++; /* increment precharge temporarily */
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
-
return 0;
}
@@ -6817,18 +6730,14 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
unsigned long precharge;
struct vm_area_struct *vma;
+ struct mm_walk mem_cgroup_count_precharge_walk = {
+ .pmd_entry = mem_cgroup_count_precharge_pmd,
+ .pte_entry = mem_cgroup_count_precharge_pte,
+ .mm = mm,
+ };
down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- struct mm_walk mem_cgroup_count_precharge_walk = {
- .pmd_entry = mem_cgroup_count_precharge_pte_range,
- .mm = mm,
- .private = vma,
- };
- if (is_vm_hugetlb_page(vma))
- continue;
- walk_page_range(vma->vm_start, vma->vm_end,
- &mem_cgroup_count_precharge_walk);
- }
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ walk_page_vma(vma, &mem_cgroup_count_precharge_walk);
up_read(&mm->mmap_sem);
precharge = mc.precharge;
@@ -6967,7 +6876,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
struct mm_walk *walk)
{
int ret = 0;
- struct vm_area_struct *vma = walk->private;
+ struct vm_area_struct *vma = walk->vma;
pte_t *pte;
spinlock_t *ptl;
enum mc_target_type target_type;
@@ -7068,6 +6977,10 @@ put: /* get_mctgt_type() gets the page */
static void mem_cgroup_move_charge(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ struct mm_walk mem_cgroup_move_charge_walk = {
+ .pmd_entry = mem_cgroup_move_charge_pte_range,
+ .mm = mm,
+ };
lru_add_drain_all();
retry:
@@ -7083,24 +6996,8 @@ retry:
cond_resched();
goto retry;
}
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- int ret;
- struct mm_walk mem_cgroup_move_charge_walk = {
- .pmd_entry = mem_cgroup_move_charge_pte_range,
- .mm = mm,
- .private = vma,
- };
- if (is_vm_hugetlb_page(vma))
- continue;
- ret = walk_page_range(vma->vm_start, vma->vm_end,
- &mem_cgroup_move_charge_walk);
- if (ret)
- /*
- * means we have consumed all precharges and failed in
- * doing additional charge. Just abandon here.
- */
- break;
- }
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ walk_page_vma(vma, &mem_cgroup_move_charge_walk);
up_read(&mm->mmap_sem);
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 35ef28acf137..9872af1b1e9d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -202,7 +202,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
#ifdef __ARCH_SI_TRAPNO
si.si_trapno = trapno;
#endif
- si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
+ si.si_addr_lsb = page_size_order(page) + PAGE_SHIFT;
if ((flags & MF_ACTION_REQUIRED) && t == current) {
si.si_code = BUS_MCEERR_AR;
@@ -404,7 +404,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
if (av == NULL) /* Not actually mapped anymore */
return;
- pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff = page_pgoff(page);
read_lock(&tasklist_lock);
for_each_process (tsk) {
struct anon_vma_chain *vmac;
@@ -437,7 +437,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
mutex_lock(&mapping->i_mmap_mutex);
read_lock(&tasklist_lock);
for_each_process(tsk) {
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_pgoff(page);
if (!task_early_kill(tsk))
continue;
@@ -1081,15 +1081,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
return 0;
} else if (PageHuge(hpage)) {
/*
- * Check "just unpoisoned", "filter hit", and
- * "race with other subpage."
+ * Check "filter hit" and "race with other subpage."
*/
lock_page(hpage);
- if (!PageHWPoison(hpage)
- || (hwpoison_filter(p) && TestClearPageHWPoison(p))
- || (p != hpage && TestSetPageHWPoison(hpage))) {
- atomic_long_sub(nr_pages, &num_poisoned_pages);
- return 0;
+ if (PageHWPoison(hpage)) {
+ if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+ || (p != hpage && TestSetPageHWPoison(hpage))) {
+ atomic_long_sub(nr_pages, &num_poisoned_pages);
+ unlock_page(hpage);
+ return 0;
+ }
}
set_page_hwpoison_huge_page(hpage);
res = dequeue_hwpoisoned_huge_page(hpage);
@@ -1295,7 +1296,7 @@ static void memory_failure_work_func(struct work_struct *work)
unsigned long proc_flags;
int gotten;
- mf_cpu = &__get_cpu_var(memory_failure_cpu);
+ mf_cpu = this_cpu_ptr(&memory_failure_cpu);
for (;;) {
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
@@ -1500,7 +1501,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
/* Keep page count to indicate a given hugepage is isolated. */
list_move(&hpage->lru, &pagelist);
- ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+ ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
@@ -1581,7 +1582,7 @@ static int __soft_offline_page(struct page *page, int flags)
inc_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
list_add(&page->lru, &pagelist);
- ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+ ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
if (!list_empty(&pagelist)) {
@@ -1661,11 +1662,7 @@ int soft_offline_page(struct page *page, int flags)
}
}
- /*
- * The lock_memory_hotplug prevents a race with memory hotplug.
- * This is a big hammer, a better would be nicer.
- */
- lock_memory_hotplug();
+ get_online_mems();
/*
* Isolate the page, so that it doesn't get reallocated if it
@@ -1676,7 +1673,7 @@ int soft_offline_page(struct page *page, int flags)
set_migratetype_isolate(page, true);
ret = get_any_page(page, pfn, flags);
- unlock_memory_hotplug();
+ put_online_mems();
if (ret > 0) { /* for in-use pages */
if (PageHuge(page))
ret = soft_offline_huge_page(page, flags);
diff --git a/mm/memory.c b/mm/memory.c
index e302ae1dcce0..b256439cd237 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -698,11 +698,6 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
-static inline bool is_cow_mapping(vm_flags_t flags)
-{
- return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
-}
-
/*
* vm_normal_page -- This function gets the "struct page" associated with a pte.
*
@@ -756,7 +751,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn = pte_pfn(pte);
if (HAVE_PTE_SPECIAL) {
- if (likely(!pte_special(pte)))
+ if (likely(!pte_special(pte) || pte_numa(pte)))
goto check_pfn;
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
@@ -782,14 +777,15 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
}
}
- if (is_zero_pfn(pfn))
- return NULL;
check_pfn:
if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
+ if (is_zero_pfn(pfn))
+ return NULL;
+
/*
* NOTE! We still have PageReserved() pages in the page tables.
* eg. VDSO mappings can cause them to exist.
@@ -1457,646 +1453,6 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
-/**
- * follow_page_mask - look up a page descriptor from a user-virtual address
- * @vma: vm_area_struct mapping @address
- * @address: virtual address to look up
- * @flags: flags modifying lookup behaviour
- * @page_mask: on output, *page_mask is set according to the size of the page
- *
- * @flags can have FOLL_ flags set, defined in <linux/mm.h>
- *
- * Returns the mapped (struct page *), %NULL if no mapping exists, or
- * an error pointer if there is a mapping to something not represented
- * by a page descriptor (see also vm_normal_page()).
- */
-struct page *follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned int *page_mask)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *ptep, pte;
- spinlock_t *ptl;
- struct page *page;
- struct mm_struct *mm = vma->vm_mm;
-
- *page_mask = 0;
-
- page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
- if (!IS_ERR(page)) {
- BUG_ON(flags & FOLL_GET);
- goto out;
- }
-
- page = NULL;
- pgd = pgd_offset(mm, address);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto no_page_table;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- goto no_page_table;
- if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
- if (flags & FOLL_GET)
- goto out;
- page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
- goto out;
- }
- if (unlikely(pud_bad(*pud)))
- goto no_page_table;
-
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd))
- goto no_page_table;
- if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
- page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
- if (flags & FOLL_GET) {
- /*
- * Refcount on tail pages are not well-defined and
- * shouldn't be taken. The caller should handle a NULL
- * return when trying to follow tail pages.
- */
- if (PageHead(page))
- get_page(page);
- else {
- page = NULL;
- goto out;
- }
- }
- goto out;
- }
- if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
- goto no_page_table;
- if (pmd_trans_huge(*pmd)) {
- if (flags & FOLL_SPLIT) {
- split_huge_page_pmd(vma, address, pmd);
- goto split_fallthrough;
- }
- ptl = pmd_lock(mm, pmd);
- if (likely(pmd_trans_huge(*pmd))) {
- if (unlikely(pmd_trans_splitting(*pmd))) {
- spin_unlock(ptl);
- wait_split_huge_page(vma->anon_vma, pmd);
- } else {
- page = follow_trans_huge_pmd(vma, address,
- pmd, flags);
- spin_unlock(ptl);
- *page_mask = HPAGE_PMD_NR - 1;
- goto out;
- }
- } else
- spin_unlock(ptl);
- /* fall through */
- }
-split_fallthrough:
- if (unlikely(pmd_bad(*pmd)))
- goto no_page_table;
-
- ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
-
- pte = *ptep;
- if (!pte_present(pte)) {
- swp_entry_t entry;
- /*
- * KSM's break_ksm() relies upon recognizing a ksm page
- * even while it is being migrated, so for that case we
- * need migration_entry_wait().
- */
- if (likely(!(flags & FOLL_MIGRATION)))
- goto no_page;
- if (pte_none(pte) || pte_file(pte))
- goto no_page;
- entry = pte_to_swp_entry(pte);
- if (!is_migration_entry(entry))
- goto no_page;
- pte_unmap_unlock(ptep, ptl);
- migration_entry_wait(mm, pmd, address);
- goto split_fallthrough;
- }
- if ((flags & FOLL_NUMA) && pte_numa(pte))
- goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte))
- goto unlock;
-
- page = vm_normal_page(vma, address, pte);
- if (unlikely(!page)) {
- if ((flags & FOLL_DUMP) ||
- !is_zero_pfn(pte_pfn(pte)))
- goto bad_page;
- page = pte_page(pte);
- }
-
- if (flags & FOLL_GET)
- get_page_foll(page);
- if (flags & FOLL_TOUCH) {
- if ((flags & FOLL_WRITE) &&
- !pte_dirty(pte) && !PageDirty(page))
- set_page_dirty(page);
- /*
- * pte_mkyoung() would be more correct here, but atomic care
- * is needed to avoid losing the dirty bit: it is easier to use
- * mark_page_accessed().
- */
- mark_page_accessed(page);
- }
- if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
- /*
- * The preliminary mapping check is mainly to avoid the
- * pointless overhead of lock_page on the ZERO_PAGE
- * which might bounce very badly if there is contention.
- *
- * If the page is already locked, we don't need to
- * handle it now - vmscan will handle it later if and
- * when it attempts to reclaim the page.
- */
- if (page->mapping && trylock_page(page)) {
- lru_add_drain(); /* push cached pages to LRU */
- /*
- * Because we lock page here, and migration is
- * blocked by the pte's page reference, and we
- * know the page is still mapped, we don't even
- * need to check for file-cache page truncation.
- */
- mlock_vma_page(page);
- unlock_page(page);
- }
- }
-unlock:
- pte_unmap_unlock(ptep, ptl);
-out:
- return page;
-
-bad_page:
- pte_unmap_unlock(ptep, ptl);
- return ERR_PTR(-EFAULT);
-
-no_page:
- pte_unmap_unlock(ptep, ptl);
- if (!pte_none(pte))
- return page;
-
-no_page_table:
- /*
- * When core dumping an enormous anonymous area that nobody
- * has touched so far, we don't want to allocate unnecessary pages or
- * page tables. Return error instead of NULL to skip handle_mm_fault,
- * then get_dump_page() will return NULL to leave a hole in the dump.
- * But we can only make this optimization where a hole would surely
- * be zero-filled if handle_mm_fault() actually did handle it.
- */
- if ((flags & FOLL_DUMP) &&
- (!vma->vm_ops || !vma->vm_ops->fault))
- return ERR_PTR(-EFAULT);
- return page;
-}
-
-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
-{
- return stack_guard_page_start(vma, addr) ||
- stack_guard_page_end(vma, addr+PAGE_SIZE);
-}
-
-/**
- * __get_user_pages() - pin user pages in memory
- * @tsk: task_struct of target task
- * @mm: mm_struct of target mm
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @gup_flags: flags modifying pin behaviour
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long. Or NULL, if caller
- * only intends to ensure the pages are faulted in.
- * @vmas: array of pointers to vmas corresponding to each page.
- * Or NULL if the caller does not require them.
- * @nonblocking: whether waiting for disk IO or mmap_sem contention
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno. Each page returned must be released
- * with a put_page() call when it is finished with. vmas will only
- * remain valid while mmap_sem is held.
- *
- * Must be called with mmap_sem held for read or write.
- *
- * __get_user_pages walks a process's page tables and takes a reference to
- * each struct page that each user address corresponds to at a given
- * instant. That is, it takes the page that would be accessed if a user
- * thread accesses the given user virtual address at that instant.
- *
- * This does not guarantee that the page exists in the user mappings when
- * __get_user_pages returns, and there may even be a completely different
- * page there in some cases (eg. if mmapped pagecache has been invalidated
- * and subsequently re faulted). However it does guarantee that the page
- * won't be freed completely. And mostly callers simply care that the page
- * contains data that was valid *at some point in time*. Typically, an IO
- * or similar operation cannot guarantee anything stronger anyway because
- * locks can't be held over the syscall boundary.
- *
- * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
- * the page is written to, set_page_dirty (or set_page_dirty_lock, as
- * appropriate) must be called after the page is finished with, and
- * before put_page is called.
- *
- * If @nonblocking != NULL, __get_user_pages will not wait for disk IO
- * or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
- *
- * In most cases, get_user_pages or get_user_pages_fast should be used
- * instead of __get_user_pages. __get_user_pages should be used only if
- * you need some special @gup_flags.
- */
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *nonblocking)
-{
- long i;
- unsigned long vm_flags;
- unsigned int page_mask;
-
- if (!nr_pages)
- return 0;
-
- VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
-
- /*
- * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
- * would be called on PROT_NONE ranges. We must never invoke
- * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
- * page faults would unprotect the PROT_NONE ranges if
- * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
- * bitflag. So to avoid that, don't set FOLL_NUMA if
- * FOLL_FORCE is set.
- */
- if (!(gup_flags & FOLL_FORCE))
- gup_flags |= FOLL_NUMA;
-
- i = 0;
-
- do {
- struct vm_area_struct *vma;
-
- vma = find_extend_vma(mm, start);
- if (!vma && in_gate_area(mm, start)) {
- unsigned long pg = start & PAGE_MASK;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- /* user gate pages are read-only */
- if (gup_flags & FOLL_WRITE)
- goto efault;
- if (pg > TASK_SIZE)
- pgd = pgd_offset_k(pg);
- else
- pgd = pgd_offset_gate(mm, pg);
- BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, pg);
- BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, pg);
- if (pmd_none(*pmd))
- goto efault;
- VM_BUG_ON(pmd_trans_huge(*pmd));
- pte = pte_offset_map(pmd, pg);
- if (pte_none(*pte)) {
- pte_unmap(pte);
- goto efault;
- }
- vma = get_gate_vma(mm);
- if (pages) {
- struct page *page;
-
- page = vm_normal_page(vma, start, *pte);
- if (!page) {
- if (!(gup_flags & FOLL_DUMP) &&
- is_zero_pfn(pte_pfn(*pte)))
- page = pte_page(*pte);
- else {
- pte_unmap(pte);
- goto efault;
- }
- }
- pages[i] = page;
- get_page(page);
- }
- pte_unmap(pte);
- page_mask = 0;
- goto next_page;
- }
-
- if (!vma)
- goto efault;
- vm_flags = vma->vm_flags;
- if (vm_flags & (VM_IO | VM_PFNMAP))
- goto efault;
-
- if (gup_flags & FOLL_WRITE) {
- if (!(vm_flags & VM_WRITE)) {
- if (!(gup_flags & FOLL_FORCE))
- goto efault;
- /*
- * We used to let the write,force case do COW
- * in a VM_MAYWRITE VM_SHARED !VM_WRITE vma, so
- * ptrace could set a breakpoint in a read-only
- * mapping of an executable, without corrupting
- * the file (yet only when that file had been
- * opened for writing!). Anon pages in shared
- * mappings are surprising: now just reject it.
- */
- if (!is_cow_mapping(vm_flags)) {
- WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
- goto efault;
- }
- }
- } else {
- if (!(vm_flags & VM_READ)) {
- if (!(gup_flags & FOLL_FORCE))
- goto efault;
- /*
- * Is there actually any vma we can reach here
- * which does not have VM_MAYREAD set?
- */
- if (!(vm_flags & VM_MAYREAD))
- goto efault;
- }
- }
-
- if (is_vm_hugetlb_page(vma)) {
- i = follow_hugetlb_page(mm, vma, pages, vmas,
- &start, &nr_pages, i, gup_flags);
- continue;
- }
-
- do {
- struct page *page;
- unsigned int foll_flags = gup_flags;
- unsigned int page_increm;
-
- /*
- * If we have a pending SIGKILL, don't keep faulting
- * pages and potentially allocating memory.
- */
- if (unlikely(fatal_signal_pending(current)))
- return i ? i : -ERESTARTSYS;
-
- cond_resched();
- while (!(page = follow_page_mask(vma, start,
- foll_flags, &page_mask))) {
- int ret;
- unsigned int fault_flags = 0;
-
- /* For mlock, just skip the stack guard page. */
- if (foll_flags & FOLL_MLOCK) {
- if (stack_guard_page(vma, start))
- goto next_page;
- }
- if (foll_flags & FOLL_WRITE)
- fault_flags |= FAULT_FLAG_WRITE;
- if (nonblocking)
- fault_flags |= FAULT_FLAG_ALLOW_RETRY;
- if (foll_flags & FOLL_NOWAIT)
- fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
-
- ret = handle_mm_fault(mm, vma, start,
- fault_flags);
-
- if (ret & VM_FAULT_ERROR) {
- if (ret & VM_FAULT_OOM)
- return i ? i : -ENOMEM;
- if (ret & (VM_FAULT_HWPOISON |
- VM_FAULT_HWPOISON_LARGE)) {
- if (i)
- return i;
- else if (gup_flags & FOLL_HWPOISON)
- return -EHWPOISON;
- else
- return -EFAULT;
- }
- if (ret & VM_FAULT_SIGBUS)
- goto efault;
- BUG();
- }
-
- if (tsk) {
- if (ret & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- }
-
- if (ret & VM_FAULT_RETRY) {
- if (nonblocking)
- *nonblocking = 0;
- return i;
- }
-
- /*
- * The VM_FAULT_WRITE bit tells us that
- * do_wp_page has broken COW when necessary,
- * even if maybe_mkwrite decided not to set
- * pte_write. We can thus safely do subsequent
- * page lookups as if they were reads. But only
- * do so when looping for pte_write is futile:
- * in some cases userspace may also be wanting
- * to write to the gotten user page, which a
- * read fault here might prevent (a readonly
- * page might get reCOWed by userspace write).
- */
- if ((ret & VM_FAULT_WRITE) &&
- !(vma->vm_flags & VM_WRITE))
- foll_flags &= ~FOLL_WRITE;
-
- cond_resched();
- }
- if (IS_ERR(page))
- return i ? i : PTR_ERR(page);
- if (pages) {
- pages[i] = page;
-
- flush_anon_page(vma, page, start);
- flush_dcache_page(page);
- page_mask = 0;
- }
-next_page:
- if (vmas) {
- vmas[i] = vma;
- page_mask = 0;
- }
- page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
- if (page_increm > nr_pages)
- page_increm = nr_pages;
- i += page_increm;
- start += page_increm * PAGE_SIZE;
- nr_pages -= page_increm;
- } while (nr_pages && start < vma->vm_end);
- } while (nr_pages);
- return i;
-efault:
- return i ? : -EFAULT;
-}
-EXPORT_SYMBOL(__get_user_pages);
-
-/*
- * fixup_user_fault() - manually resolve a user page fault
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
- * @mm: mm_struct of target mm
- * @address: user address
- * @fault_flags:flags to pass down to handle_mm_fault()
- *
- * This is meant to be called in the specific scenario where for locking reasons
- * we try to access user memory in atomic context (within a pagefault_disable()
- * section), this returns -EFAULT, and we want to resolve the user fault before
- * trying again.
- *
- * Typically this is meant to be used by the futex code.
- *
- * The main difference with get_user_pages() is that this function will
- * unconditionally call handle_mm_fault() which will in turn perform all the
- * necessary SW fixup of the dirty and young bits in the PTE, while
- * handle_mm_fault() only guarantees to update these in the struct page.
- *
- * This is important for some architectures where those bits also gate the
- * access permission to the page because they are maintained in software. On
- * such architectures, gup() will not be enough to make a subsequent access
- * succeed.
- *
- * This should be called with the mm_sem held for read.
- */
-int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long address, unsigned int fault_flags)
-{
- struct vm_area_struct *vma;
- vm_flags_t vm_flags;
- int ret;
-
- vma = find_extend_vma(mm, address);
- if (!vma || address < vma->vm_start)
- return -EFAULT;
-
- vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
- if (!(vm_flags & vma->vm_flags))
- return -EFAULT;
-
- ret = handle_mm_fault(mm, vma, address, fault_flags);
- if (ret & VM_FAULT_ERROR) {
- if (ret & VM_FAULT_OOM)
- return -ENOMEM;
- if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
- return -EHWPOISON;
- if (ret & VM_FAULT_SIGBUS)
- return -EFAULT;
- BUG();
- }
- if (tsk) {
- if (ret & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- }
- return 0;
-}
-
-/*
- * get_user_pages() - pin user pages in memory
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
- * @mm: mm_struct of target mm
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to by the caller
- * @force: whether to force access even when user mapping is currently
- * protected (but never forces write access to shared mapping).
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long. Or NULL, if caller
- * only intends to ensure the pages are faulted in.
- * @vmas: array of pointers to vmas corresponding to each page.
- * Or NULL if the caller does not require them.
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno. Each page returned must be released
- * with a put_page() call when it is finished with. vmas will only
- * remain valid while mmap_sem is held.
- *
- * Must be called with mmap_sem held for read or write.
- *
- * get_user_pages walks a process's page tables and takes a reference to
- * each struct page that each user address corresponds to at a given
- * instant. That is, it takes the page that would be accessed if a user
- * thread accesses the given user virtual address at that instant.
- *
- * This does not guarantee that the page exists in the user mappings when
- * get_user_pages returns, and there may even be a completely different
- * page there in some cases (eg. if mmapped pagecache has been invalidated
- * and subsequently re faulted). However it does guarantee that the page
- * won't be freed completely. And mostly callers simply care that the page
- * contains data that was valid *at some point in time*. Typically, an IO
- * or similar operation cannot guarantee anything stronger anyway because
- * locks can't be held over the syscall boundary.
- *
- * If write=0, the page must not be written to. If the page is written to,
- * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
- * after the page is finished with, and before put_page is called.
- *
- * get_user_pages is typically used for fewer-copy IO operations, to get a
- * handle on the memory by some means other than accesses via the user virtual
- * addresses. The pages may be submitted for DMA to devices or accessed via
- * their kernel linear mapping (via the kmap APIs). Care should be taken to
- * use the correct cache flushing APIs.
- *
- * See also get_user_pages_fast, for performance critical applications.
- */
-long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages, int write,
- int force, struct page **pages, struct vm_area_struct **vmas)
-{
- int flags = FOLL_TOUCH;
-
- if (pages)
- flags |= FOLL_GET;
- if (write)
- flags |= FOLL_WRITE;
- if (force)
- flags |= FOLL_FORCE;
-
- return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
- NULL);
-}
-EXPORT_SYMBOL(get_user_pages);
-
-/**
- * get_dump_page() - pin user page in memory while writing it to core dump
- * @addr: user address
- *
- * Returns struct page pointer of user page pinned for dump,
- * to be freed afterwards by page_cache_release() or put_page().
- *
- * Returns NULL on any kind of failure - a hole must then be inserted into
- * the corefile, to preserve alignment with its headers; and also returns
- * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
- * allowing a hole to be left in the corefile to save diskspace.
- *
- * Called without mmap_sem, but after all other threads have been killed.
- */
-#ifdef CONFIG_ELF_CORE
-struct page *get_dump_page(unsigned long addr)
-{
- struct vm_area_struct *vma;
- struct page *page;
-
- if (__get_user_pages(current, current->mm, addr, 1,
- FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
- NULL) < 1)
- return NULL;
- flush_cache_page(vma, addr, page_to_pfn(page));
- return page;
-}
-#endif /* CONFIG_ELF_CORE */
-
pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
spinlock_t **ptl)
{
@@ -3598,6 +2954,8 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -3628,6 +2986,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (set_page_dirty(fault_page))
dirtied = 1;
+ /*
+ * Take a local copy of the address_space - page.mapping may be zeroed
+ * by truncate after unlock_page(). The address_space itself remains
+ * pinned by vma->vm_file's reference. We rely on unlock_page()'s
+ * release semantics to prevent the compiler from undoing this copying.
+ */
mapping = fault_page->mapping;
unlock_page(fault_page);
if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a650db29606f..469bbf505f85 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -46,19 +46,84 @@
static void generic_online_page(struct page *page);
static online_page_callback_t online_page_callback = generic_online_page;
+static DEFINE_MUTEX(online_page_callback_lock);
-DEFINE_MUTEX(mem_hotplug_mutex);
+/* The same as the cpu_hotplug lock, but for memory hotplug. */
+static struct {
+ struct task_struct *active_writer;
+ struct mutex lock; /* Synchronizes accesses to refcount, */
+ /*
+ * Also blocks the new readers during
+ * an ongoing mem hotplug operation.
+ */
+ int refcount;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+} mem_hotplug = {
+ .active_writer = NULL,
+ .lock = __MUTEX_INITIALIZER(mem_hotplug.lock),
+ .refcount = 0,
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ .dep_map = {.name = "mem_hotplug.lock" },
+#endif
+};
+
+/* Lockdep annotations for get/put_online_mems() and mem_hotplug_begin/end() */
+#define memhp_lock_acquire_read() lock_map_acquire_read(&mem_hotplug.dep_map)
+#define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map)
+#define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map)
+
+void get_online_mems(void)
+{
+ might_sleep();
+ if (mem_hotplug.active_writer == current)
+ return;
+ memhp_lock_acquire_read();
+ mutex_lock(&mem_hotplug.lock);
+ mem_hotplug.refcount++;
+ mutex_unlock(&mem_hotplug.lock);
+
+}
-void lock_memory_hotplug(void)
+void put_online_mems(void)
{
- mutex_lock(&mem_hotplug_mutex);
+ if (mem_hotplug.active_writer == current)
+ return;
+ mutex_lock(&mem_hotplug.lock);
+
+ if (WARN_ON(!mem_hotplug.refcount))
+ mem_hotplug.refcount++; /* try to fix things up */
+
+ if (!--mem_hotplug.refcount && unlikely(mem_hotplug.active_writer))
+ wake_up_process(mem_hotplug.active_writer);
+ mutex_unlock(&mem_hotplug.lock);
+ memhp_lock_release();
+
}
-void unlock_memory_hotplug(void)
+static void mem_hotplug_begin(void)
{
- mutex_unlock(&mem_hotplug_mutex);
+ mem_hotplug.active_writer = current;
+
+ memhp_lock_acquire();
+ for (;;) {
+ mutex_lock(&mem_hotplug.lock);
+ if (likely(!mem_hotplug.refcount))
+ break;
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&mem_hotplug.lock);
+ schedule();
+ }
}
+static void mem_hotplug_done(void)
+{
+ mem_hotplug.active_writer = NULL;
+ mutex_unlock(&mem_hotplug.lock);
+ memhp_lock_release();
+}
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
@@ -727,14 +792,16 @@ int set_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;
- lock_memory_hotplug();
+ get_online_mems();
+ mutex_lock(&online_page_callback_lock);
if (online_page_callback == generic_online_page) {
online_page_callback = callback;
rc = 0;
}
- unlock_memory_hotplug();
+ mutex_unlock(&online_page_callback_lock);
+ put_online_mems();
return rc;
}
@@ -744,14 +811,16 @@ int restore_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;
- lock_memory_hotplug();
+ get_online_mems();
+ mutex_lock(&online_page_callback_lock);
if (online_page_callback == callback) {
online_page_callback = generic_online_page;
rc = 0;
}
- unlock_memory_hotplug();
+ mutex_unlock(&online_page_callback_lock);
+ put_online_mems();
return rc;
}
@@ -899,7 +968,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
int ret;
struct memory_notify arg;
- lock_memory_hotplug();
+ mem_hotplug_begin();
/*
* This doesn't need a lock to do pfn_to_page().
* The section can't be removed here because of the
@@ -907,23 +976,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
*/
zone = page_zone(pfn_to_page(pfn));
+ ret = -EINVAL;
if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
- !can_online_high_movable(zone)) {
- unlock_memory_hotplug();
- return -EINVAL;
- }
+ !can_online_high_movable(zone))
+ goto out;
if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
- if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) {
- unlock_memory_hotplug();
- return -EINVAL;
- }
+ if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
+ goto out;
}
if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
- if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) {
- unlock_memory_hotplug();
- return -EINVAL;
- }
+ if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
+ goto out;
}
/* Previous code may changed the zone of the pfn range */
@@ -939,8 +1003,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
ret = notifier_to_errno(ret);
if (ret) {
memory_notify(MEM_CANCEL_ONLINE, &arg);
- unlock_memory_hotplug();
- return ret;
+ goto out;
}
/*
* If this zone is not populated, then it is not in zonelist.
@@ -964,8 +1027,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
(((unsigned long long) pfn + nr_pages)
<< PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
- unlock_memory_hotplug();
- return ret;
+ goto out;
}
zone->present_pages += onlined_pages;
@@ -995,9 +1057,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
if (onlined_pages)
memory_notify(MEM_ONLINE, &arg);
- unlock_memory_hotplug();
-
- return 0;
+out:
+ mem_hotplug_done();
+ return ret;
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
@@ -1007,7 +1069,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
struct pglist_data *pgdat;
unsigned long zones_size[MAX_NR_ZONES] = {0};
unsigned long zholes_size[MAX_NR_ZONES] = {0};
- unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long start_pfn = PFN_DOWN(start);
pgdat = NODE_DATA(nid);
if (!pgdat) {
@@ -1055,7 +1117,7 @@ int try_online_node(int nid)
if (node_online(nid))
return 0;
- lock_memory_hotplug();
+ mem_hotplug_begin();
pgdat = hotadd_new_pgdat(nid, 0);
if (!pgdat) {
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
@@ -1073,13 +1135,13 @@ int try_online_node(int nid)
}
out:
- unlock_memory_hotplug();
+ mem_hotplug_done();
return ret;
}
static int check_hotplug_memory_range(u64 start, u64 size)
{
- u64 start_pfn = start >> PAGE_SHIFT;
+ u64 start_pfn = PFN_DOWN(start);
u64 nr_pages = size >> PAGE_SHIFT;
/* Memory range must be aligned with section */
@@ -1117,7 +1179,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
new_pgdat = !p;
}
- lock_memory_hotplug();
+ mem_hotplug_begin();
new_node = !node_online(nid);
if (new_node) {
@@ -1158,7 +1220,7 @@ error:
release_memory_resource(res);
out:
- unlock_memory_hotplug();
+ mem_hotplug_done();
return ret;
}
EXPORT_SYMBOL_GPL(add_memory);
@@ -1332,7 +1394,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
* alloc_migrate_target should be improooooved!!
* migrate_pages returns # of failed pages.
*/
- ret = migrate_pages(&source, alloc_migrate_target, 0,
+ ret = migrate_pages(&source, alloc_migrate_target, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret)
putback_movable_pages(&source);
@@ -1565,7 +1627,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
if (!test_pages_in_a_zone(start_pfn, end_pfn))
return -EINVAL;
- lock_memory_hotplug();
+ mem_hotplug_begin();
zone = page_zone(pfn_to_page(start_pfn));
node = zone_to_nid(zone);
@@ -1672,7 +1734,7 @@ repeat:
writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg);
- unlock_memory_hotplug();
+ mem_hotplug_done();
return 0;
failed_removal:
@@ -1684,7 +1746,7 @@ failed_removal:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
out:
- unlock_memory_hotplug();
+ mem_hotplug_done();
return ret;
}
@@ -1888,7 +1950,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
BUG_ON(check_hotplug_memory_range(start, size));
- lock_memory_hotplug();
+ mem_hotplug_begin();
/*
* All memory blocks must be offlined before removing memory. Check
@@ -1897,10 +1959,8 @@ void __ref remove_memory(int nid, u64 start, u64 size)
*/
ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
check_memblock_offlined_cb);
- if (ret) {
- unlock_memory_hotplug();
+ if (ret)
BUG();
- }
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
@@ -1909,7 +1969,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
try_offline_node(nid);
- unlock_memory_hotplug();
+ mem_hotplug_done();
}
EXPORT_SYMBOL_GPL(remove_memory);
#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 78e1472933ea..8bc119909e1c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -65,6 +65,8 @@
kernel is not always grateful with that.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/highmem.h>
@@ -91,6 +93,7 @@
#include <linux/ctype.h>
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
+#include <linux/printk.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
@@ -476,140 +479,70 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags);
+struct queue_pages {
+ struct list_head *pagelist;
+ unsigned long flags;
+ nodemask_t *nmask;
+ struct vm_area_struct *prev;
+};
+
/*
* Scan through pages checking if pages follow certain conditions,
* and move them to the pagelist if they do.
*/
-static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
+static int queue_pages_pte(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
- pte_t *orig_pte;
- pte_t *pte;
- spinlock_t *ptl;
-
- orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- do {
- struct page *page;
- int nid;
+ struct vm_area_struct *vma = walk->vma;
+ struct page *page;
+ struct queue_pages *qp = walk->private;
+ unsigned long flags = qp->flags;
+ int nid;
- if (!pte_present(*pte))
- continue;
- page = vm_normal_page(vma, addr, *pte);
- if (!page)
- continue;
- /*
- * vm_normal_page() filters out zero pages, but there might
- * still be PageReserved pages to skip, perhaps in a VDSO.
- */
- if (PageReserved(page))
- continue;
- nid = page_to_nid(page);
- if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
- continue;
+ if (!pte_present(*pte))
+ return 0;
+ page = vm_normal_page(vma, addr, *pte);
+ if (!page)
+ return 0;
+ /*
+ * vm_normal_page() filters out zero pages, but there might
+ * still be PageReserved pages to skip, perhaps in a VDSO.
+ */
+ if (PageReserved(page))
+ return 0;
+ nid = page_to_nid(page);
+ if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
+ return 0;
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
- migrate_page_add(page, private, flags);
- else
- break;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap_unlock(orig_pte, ptl);
- return addr != end;
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ migrate_page_add(page, qp->pagelist, flags);
+ return 0;
}
-static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
- pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
- void *private)
+static int queue_pages_hugetlb(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
#ifdef CONFIG_HUGETLB_PAGE
+ struct queue_pages *qp = walk->private;
+ unsigned long flags = qp->flags;
int nid;
struct page *page;
- spinlock_t *ptl;
+ pte_t entry;
- ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
- page = pte_page(huge_ptep_get((pte_t *)pmd));
+ entry = huge_ptep_get(pte);
+ if (!pte_present(entry))
+ return 0;
+ page = pte_page(entry);
nid = page_to_nid(page);
- if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
- goto unlock;
+ if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
+ return 0;
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
- isolate_huge_page(page, private);
-unlock:
- spin_unlock(ptl);
+ isolate_huge_page(page, qp->pagelist);
#else
BUG();
#endif
-}
-
-static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (!pmd_present(*pmd))
- continue;
- if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
- queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
- flags, private);
- continue;
- }
- split_huge_page_pmd(vma, addr, pmd);
- if (pmd_none_or_trans_huge_or_clear_bad(pmd))
- continue;
- if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
- flags, private))
- return -EIO;
- } while (pmd++, addr = next, addr != end);
- return 0;
-}
-
-static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
- continue;
- if (pud_none_or_clear_bad(pud))
- continue;
- if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
- flags, private))
- return -EIO;
- } while (pud++, addr = next, addr != end);
- return 0;
-}
-
-static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- const nodemask_t *nodes, unsigned long flags,
- void *private)
-{
- pgd_t *pgd;
- unsigned long next;
-
- pgd = pgd_offset(vma->vm_mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
- flags, private))
- return -EIO;
- } while (pgd++, addr = next, addr != end);
return 0;
}
@@ -642,6 +575,45 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
}
#endif /* CONFIG_NUMA_BALANCING */
+static int queue_pages_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+ struct queue_pages *qp = walk->private;
+ unsigned long endvma = vma->vm_end;
+ unsigned long flags = qp->flags;
+
+ if (endvma > end)
+ endvma = end;
+ if (vma->vm_start > start)
+ start = vma->vm_start;
+
+ if (!(flags & MPOL_MF_DISCONTIG_OK)) {
+ if (!vma->vm_next && vma->vm_end < end)
+ return -EFAULT;
+ if (qp->prev && qp->prev->vm_end < vma->vm_start)
+ return -EFAULT;
+ }
+
+ qp->prev = vma;
+ walk->skip = 1;
+
+ if (vma->vm_flags & VM_PFNMAP)
+ return 0;
+
+ if (flags & MPOL_MF_LAZY) {
+ change_prot_numa(vma, start, endvma);
+ return 0;
+ }
+
+ if ((flags & MPOL_MF_STRICT) ||
+ ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
+ vma_migratable(vma)))
+ /* queue pages from current vma */
+ walk->skip = 0;
+ return 0;
+}
+
/*
* Walk through page tables and collect pages to be migrated.
*
@@ -651,51 +623,29 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
*/
static struct vm_area_struct *
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
- const nodemask_t *nodes, unsigned long flags, void *private)
+ nodemask_t *nodes, unsigned long flags,
+ struct list_head *pagelist)
{
int err;
- struct vm_area_struct *first, *vma, *prev;
-
-
- first = find_vma(mm, start);
- if (!first)
- return ERR_PTR(-EFAULT);
- prev = NULL;
- for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
- unsigned long endvma = vma->vm_end;
-
- if (endvma > end)
- endvma = end;
- if (vma->vm_start > start)
- start = vma->vm_start;
-
- if (!(flags & MPOL_MF_DISCONTIG_OK)) {
- if (!vma->vm_next && vma->vm_end < end)
- return ERR_PTR(-EFAULT);
- if (prev && prev->vm_end < vma->vm_start)
- return ERR_PTR(-EFAULT);
- }
-
- if (flags & MPOL_MF_LAZY) {
- change_prot_numa(vma, start, endvma);
- goto next;
- }
-
- if ((flags & MPOL_MF_STRICT) ||
- ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
- vma_migratable(vma))) {
-
- err = queue_pages_pgd_range(vma, start, endvma, nodes,
- flags, private);
- if (err) {
- first = ERR_PTR(err);
- break;
- }
- }
-next:
- prev = vma;
- }
- return first;
+ struct queue_pages qp = {
+ .pagelist = pagelist,
+ .flags = flags,
+ .nmask = nodes,
+ .prev = NULL,
+ };
+ struct mm_walk queue_pages_walk = {
+ .hugetlb_entry = queue_pages_hugetlb,
+ .pte_entry = queue_pages_pte,
+ .test_walk = queue_pages_test_walk,
+ .mm = mm,
+ .private = &qp,
+ };
+
+ err = walk_page_range(start, end, &queue_pages_walk);
+ if (err < 0)
+ return ERR_PTR(err);
+ else
+ return find_vma(mm, start);
}
/*
@@ -1028,7 +978,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, new_node_page, dest,
+ err = migrate_pages(&pagelist, new_node_page, NULL, dest,
MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(&pagelist);
@@ -1277,7 +1227,7 @@ static long do_mbind(unsigned long start, unsigned long len,
if (!list_empty(&pagelist)) {
WARN_ON_ONCE(flags & MPOL_MF_LAZY);
nr_failed = migrate_pages(&pagelist, new_vma_page,
- (unsigned long)vma,
+ NULL, (unsigned long)vma,
MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
if (nr_failed)
putback_movable_pages(&pagelist);
@@ -1362,7 +1312,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
}
SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
- unsigned long, mode, unsigned long __user *, nmask,
+ unsigned long, mode, const unsigned long __user *, nmask,
unsigned long, maxnode, unsigned, flags)
{
nodemask_t nodes;
@@ -1383,7 +1333,7 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
}
/* Set the process memory policy */
-SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask,
+SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
unsigned long, maxnode)
{
int err;
@@ -1606,9 +1556,9 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
/*
* get_vma_policy(@task, @vma, @addr)
- * @task - task for fallback if vma policy == default
- * @vma - virtual memory area whose policy is sought
- * @addr - address in @vma for shared policy lookup
+ * @task: task for fallback if vma policy == default
+ * @vma: virtual memory area whose policy is sought
+ * @addr: address in @vma for shared policy lookup
*
* Returns effective policy for a VMA at specified address.
* Falls back to @task or system default policy, as necessary.
@@ -1854,11 +1804,11 @@ int node_random(const nodemask_t *maskp)
#ifdef CONFIG_HUGETLBFS
/*
* huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
- * @vma = virtual memory area whose policy is sought
- * @addr = address in @vma for shared policy lookup and interleave policy
- * @gfp_flags = for requested zone
- * @mpol = pointer to mempolicy pointer for reference counted mempolicy
- * @nodemask = pointer to nodemask pointer for MPOL_BIND nodemask
+ * @vma: virtual memory area whose policy is sought
+ * @addr: address in @vma for shared policy lookup and interleave policy
+ * @gfp_flags: for requested zone
+ * @mpol: pointer to mempolicy pointer for reference counted mempolicy
+ * @nodemask: pointer to nodemask pointer for MPOL_BIND nodemask
*
* Returns a zonelist suitable for a huge page allocation and a pointer
* to the struct mempolicy for conditional unref after allocation.
@@ -2270,9 +2220,9 @@ static void sp_free(struct sp_node *n)
/**
* mpol_misplaced - check whether current page node is valid in policy
*
- * @page - page to be checked
- * @vma - vm area where page mapped
- * @addr - virtual address where page mapped
+ * @page: page to be checked
+ * @vma: vm area where page mapped
+ * @addr: virtual address where page mapped
*
* Lookup current policy node id for vma,addr and "compare to" page's
* node id.
@@ -2645,7 +2595,7 @@ void __init numa_policy_init(void)
node_set(prefer, interleave_nodes);
if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
- printk("numa_policy_init: interleaving failed\n");
+ pr_err("%s: interleaving failed\n", __func__);
check_numabalancing_enable();
}
diff --git a/mm/mempool.c b/mm/mempool.c
index 905434f18c97..e209c98c7203 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/kmemleak.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
@@ -192,6 +193,7 @@ EXPORT_SYMBOL(mempool_resize);
* returns NULL. Note that due to preallocation, this function
* *never* fails when called from process contexts. (it might
* fail if called from an IRQ context.)
+ * Note: using __GFP_ZERO is not supported.
*/
void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
{
@@ -200,6 +202,7 @@ void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
wait_queue_t wait;
gfp_t gfp_temp;
+ VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
might_sleep_if(gfp_mask & __GFP_WAIT);
gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
@@ -220,6 +223,11 @@ repeat_alloc:
spin_unlock_irqrestore(&pool->lock, flags);
/* paired with rmb in mempool_free(), read comment there */
smp_wmb();
+ /*
+ * Update the allocation stack trace as this is more useful
+ * for debugging.
+ */
+ kmemleak_update_trace(element);
return element;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index bed48809e5d0..2a459675eeab 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -938,8 +938,9 @@ out:
* Obtain the lock on page, remove all ptes and migrate the page
* to the newly allocated page in newpage.
*/
-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force, enum migrate_mode mode)
+static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
+ unsigned long private, struct page *page, int force,
+ enum migrate_mode mode)
{
int rc = 0;
int *result = NULL;
@@ -983,11 +984,17 @@ out:
page_is_file_cache(page));
putback_lru_page(page);
}
+
/*
- * Move the new page to the LRU. If migration was not successful
- * then this will free the page.
+ * If migration was not successful and there's a freeing callback, use
+ * it. Otherwise, putback_lru_page() will drop the reference grabbed
+ * during isolation.
*/
- putback_lru_page(newpage);
+ if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+ put_new_page(newpage, private);
+ else
+ putback_lru_page(newpage);
+
if (result) {
if (rc)
*result = rc;
@@ -1016,8 +1023,9 @@ out:
* will wait in the page fault for migration to complete.
*/
static int unmap_and_move_huge_page(new_page_t get_new_page,
- unsigned long private, struct page *hpage,
- int force, enum migrate_mode mode)
+ free_page_t put_new_page, unsigned long private,
+ struct page *hpage, int force,
+ enum migrate_mode mode)
{
int rc = 0;
int *result = NULL;
@@ -1056,20 +1064,30 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (!page_mapped(hpage))
rc = move_to_new_page(new_hpage, hpage, 1, mode);
- if (rc)
+ if (rc != MIGRATEPAGE_SUCCESS)
remove_migration_ptes(hpage, hpage);
if (anon_vma)
put_anon_vma(anon_vma);
- if (!rc)
+ if (rc == MIGRATEPAGE_SUCCESS)
hugetlb_cgroup_migrate(hpage, new_hpage);
unlock_page(hpage);
out:
if (rc != -EAGAIN)
putback_active_hugepage(hpage);
- put_page(new_hpage);
+
+ /*
+ * If migration was not successful and there's a freeing callback, use
+ * it. Otherwise, put_page() will drop the reference grabbed during
+ * isolation.
+ */
+ if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+ put_new_page(new_hpage, private);
+ else
+ put_page(new_hpage);
+
if (result) {
if (rc)
*result = rc;
@@ -1086,6 +1104,8 @@ out:
* @from: The list of pages to be migrated.
* @get_new_page: The function used to allocate free pages to be used
* as the target of the page migration.
+ * @put_new_page: The function used to free target pages if migration
+ * fails, or NULL if no special handling is necessary.
* @private: Private data to be passed on to get_new_page()
* @mode: The migration mode that specifies the constraints for
* page migration, if any.
@@ -1099,7 +1119,8 @@ out:
* Returns the number of pages that were not migrated, or an error code.
*/
int migrate_pages(struct list_head *from, new_page_t get_new_page,
- unsigned long private, enum migrate_mode mode, int reason)
+ free_page_t put_new_page, unsigned long private,
+ enum migrate_mode mode, int reason)
{
int retry = 1;
int nr_failed = 0;
@@ -1121,10 +1142,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
if (PageHuge(page))
rc = unmap_and_move_huge_page(get_new_page,
- private, page, pass > 2, mode);
+ put_new_page, private, page,
+ pass > 2, mode);
else
- rc = unmap_and_move(get_new_page, private,
- page, pass > 2, mode);
+ rc = unmap_and_move(get_new_page, put_new_page,
+ private, page, pass > 2, mode);
switch(rc) {
case -ENOMEM:
@@ -1273,7 +1295,7 @@ set_status:
err = 0;
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, new_page_node,
+ err = migrate_pages(&pagelist, new_page_node, NULL,
(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(&pagelist);
@@ -1729,7 +1751,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
- node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
+ NULL, node, MIGRATE_ASYNC,
+ MR_NUMA_MISPLACED);
if (nr_remaining) {
if (!list_empty(&migratepages)) {
list_del(&page->lru);
@@ -1852,7 +1875,7 @@ fail_putback:
* guarantee the copy is visible before the pagetable update.
*/
flush_cache_range(vma, mmun_start, mmun_end);
- page_add_new_anon_rmap(new_page, vma, mmun_start);
+ page_add_anon_rmap(new_page, vma, mmun_start);
pmdp_clear_flush(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
@@ -1877,6 +1900,10 @@ fail_putback:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+ /* Take an "isolate" reference and put new page on the LRU. */
+ get_page(new_page);
+ putback_lru_page(new_page);
+
unlock_page(new_page);
unlock_page(page);
put_page(page); /* Drop the rmap reference */
diff --git a/mm/mmap.c b/mm/mmap.c
index b1202cf81f4b..2a0e0a8337b6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -6,6 +6,8 @@
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@@ -37,6 +39,7 @@
#include <linux/sched/sysctl.h>
#include <linux/notifier.h>
#include <linux/memory.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -361,20 +364,20 @@ static int browse_rb(struct rb_root *root)
struct vm_area_struct *vma;
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
if (vma->vm_start < prev) {
- printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
+ pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev);
bug = 1;
}
if (vma->vm_start < pend) {
- printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+ pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend);
bug = 1;
}
if (vma->vm_start > vma->vm_end) {
- printk("vm_end %lx < vm_start %lx\n",
+ pr_info("vm_end %lx < vm_start %lx\n",
vma->vm_end, vma->vm_start);
bug = 1;
}
if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
- printk("free gap %lx, correct %lx\n",
+ pr_info("free gap %lx, correct %lx\n",
vma->rb_subtree_gap,
vma_compute_subtree_gap(vma));
bug = 1;
@@ -388,7 +391,7 @@ static int browse_rb(struct rb_root *root)
for (nd = pn; nd; nd = rb_prev(nd))
j++;
if (i != j) {
- printk("backwards %d, forwards %d\n", j, i);
+ pr_info("backwards %d, forwards %d\n", j, i);
bug = 1;
}
return bug ? -1 : i;
@@ -423,17 +426,17 @@ static void validate_mm(struct mm_struct *mm)
i++;
}
if (i != mm->map_count) {
- printk("map_count %d vm_next %d\n", mm->map_count, i);
+ pr_info("map_count %d vm_next %d\n", mm->map_count, i);
bug = 1;
}
if (highest_address != mm->highest_vm_end) {
- printk("mm->highest_vm_end %lx, found %lx\n",
+ pr_info("mm->highest_vm_end %lx, found %lx\n",
mm->highest_vm_end, highest_address);
bug = 1;
}
i = browse_rb(&mm->mm_rb);
if (i != mm->map_count) {
- printk("map_count %d rb %d\n", mm->map_count, i);
+ pr_info("map_count %d rb %d\n", mm->map_count, i);
bug = 1;
}
BUG_ON(bug);
@@ -640,11 +643,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct address_space *mapping = NULL;
- if (vma->vm_file)
+ if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
-
- if (mapping)
mutex_lock(&mapping->i_mmap_mutex);
+ }
__vma_link(mm, vma, prev, rb_link, rb_parent);
__vma_link_file(vma);
@@ -2579,6 +2581,72 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
return vm_munmap(addr, len);
}
+
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+ unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long populate = 0;
+ unsigned long ret = -EINVAL;
+
+ pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+ "See Documentation/vm/remap_file_pages.txt.\n",
+ current->comm, current->pid);
+
+ if (prot)
+ return ret;
+ start = start & PAGE_MASK;
+ size = size & PAGE_MASK;
+
+ if (start + size <= start)
+ return ret;
+
+ /* Does pgoff wrap? */
+ if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+ return ret;
+
+ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+
+ if (!vma || !(vma->vm_flags & VM_SHARED))
+ goto out;
+
+ if (start < vma->vm_start || start + size > vma->vm_end)
+ goto out;
+
+ if (pgoff == linear_page_index(vma, start)) {
+ ret = 0;
+ goto out;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+ prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+ prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED) {
+ flags |= MAP_LOCKED;
+ /* drop PG_Mlocked flag for over-mapped range */
+ munlock_vma_pages_range(vma, start, start + size);
+ }
+
+ ret = do_mmap_pgoff(vma->vm_file, start, size,
+ prot, flags, pgoff, &populate);
+out:
+ up_write(&mm->mmap_sem);
+ if (populate)
+ mm_populate(ret, populate);
+ if (!IS_ERR_VALUE(ret))
+ ret = 0;
+ return ret;
+}
+
static inline void verify_mm_writelocked(struct mm_struct *mm)
{
#ifdef CONFIG_DEBUG_VM
@@ -2965,9 +3033,7 @@ int install_special_mapping(struct mm_struct *mm,
struct vm_area_struct *vma = _install_special_mapping(mm,
addr, len, vm_flags, pages);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
- return 0;
+ return PTR_ERR_OR_ZERO(vma);
}
static DEFINE_MUTEX(mm_all_locks_mutex);
@@ -3252,7 +3318,7 @@ static struct notifier_block reserve_mem_nb = {
static int __meminit init_reserve_notifier(void)
{
if (register_hotmemory_notifier(&reserve_mem_nb))
- printk("Failed registering memory add/remove notifier for admin reserve");
+ pr_err("Failed registering memory add/remove notifier for admin reserve\n");
return 0;
}
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 04a9d94333a5..7ed58602e71b 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -197,7 +197,6 @@ unsigned long __init free_all_bootmem(void)
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
{
- kmemleak_free_part(__va(physaddr), size);
memblock_free(physaddr, size);
}
@@ -212,7 +211,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
*/
void __init free_bootmem(unsigned long addr, unsigned long size)
{
- kmemleak_free_part(__va(addr), size);
memblock_free(addr, size);
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 85f8d6698d48..e6ced9d836dd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -13,6 +13,8 @@
* Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
@@ -32,6 +34,7 @@
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/sched/sysctl.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
@@ -1246,7 +1249,7 @@ error_free:
return ret;
enomem:
- printk("Allocation of length %lu from process %d (%s) failed\n",
+ pr_err("Allocation of length %lu from process %d (%s) failed\n",
len, current->pid, current->comm);
show_free_areas(0);
return -ENOMEM;
@@ -1996,14 +1999,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_map_pages);
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- BUG();
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, int write)
{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 154af210178b..537d91cd3d28 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1623,7 +1623,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
* 1000+ tasks, all of them start dirtying pages at exactly the same
* time, hence all honoured too large initial task->nr_dirtied_pause.
*/
- p = &__get_cpu_var(bdp_ratelimits);
+ p = this_cpu_ptr(&bdp_ratelimits);
if (unlikely(current->nr_dirtied >= ratelimit))
*p = 0;
else if (unlikely(*p >= ratelimit_pages)) {
@@ -1635,7 +1635,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
* short-lived tasks (eg. gcc invocations in a kernel build) escaping
* the dirty throttling and livelock other long-run dirtiers.
*/
- p = &__get_cpu_var(dirty_throttle_leaks);
+ p = this_cpu_ptr(&dirty_throttle_leaks);
if (*p > 0 && current->nr_dirtied < ratelimit) {
unsigned long nr_pages_dirtied;
nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5dba2933c9c0..068922845917 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -261,8 +261,9 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
} while (zone_span_seqretry(zone, seq));
if (ret)
- pr_err("page %lu outside zone [ %lu - %lu ]\n",
- pfn, start_pfn, start_pfn + sp);
+ pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n",
+ pfn, zone_to_nid(zone), zone->name,
+ start_pfn, start_pfn + sp);
return ret;
}
@@ -408,7 +409,8 @@ static int destroy_compound_page(struct page *page, unsigned long order)
return bad;
}
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+static inline void prep_zero_page(struct page *page, unsigned int order,
+ gfp_t gfp_flags)
{
int i;
@@ -452,7 +454,7 @@ static inline void set_page_guard_flag(struct page *page) { }
static inline void clear_page_guard_flag(struct page *page) { }
#endif
-static inline void set_page_order(struct page *page, int order)
+static inline void set_page_order(struct page *page, unsigned int order)
{
set_page_private(page, order);
__SetPageBuddy(page);
@@ -503,21 +505,31 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
* For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, struct page *buddy,
- int order)
+ unsigned int order)
{
if (!pfn_valid_within(page_to_pfn(buddy)))
return 0;
- if (page_zone_id(page) != page_zone_id(buddy))
- return 0;
-
if (page_is_guard(buddy) && page_order(buddy) == order) {
VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
+ if (page_zone_id(page) != page_zone_id(buddy))
+ return 0;
+
return 1;
}
if (PageBuddy(buddy) && page_order(buddy) == order) {
VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+
+ /*
+ * zone check is done late to avoid uselessly
+ * calculating zone/node ids for pages that could
+ * never merge.
+ */
+ if (page_zone_id(page) != page_zone_id(buddy))
+ return 0;
+
return 1;
}
return 0;
@@ -549,6 +561,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
*/
static inline void __free_one_page(struct page *page,
+ unsigned long pfn,
struct zone *zone, unsigned int order,
int migratetype)
{
@@ -565,7 +578,7 @@ static inline void __free_one_page(struct page *page,
VM_BUG_ON(migratetype == -1);
- page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+ page_idx = pfn & ((1 << MAX_ORDER) - 1);
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
@@ -650,6 +663,25 @@ static inline int free_pages_check(struct page *page)
}
/*
+ * Check that a freepage cannot end up on a wrong free_list for "sensitive"
+ * migratetypes. Return false if it could. Useful for VM_BUG_ON checks.
+ */
+static bool check_freepage_migratetype(struct page *page)
+{
+ int pageblock_mt = get_pageblock_migratetype(page);
+ int freepage_mt = get_freepage_migratetype(page);
+
+ /*
+ * For RESERVE and CMA pageblocks, the freepage_migratetype must
+ * match their migratetype. For other pageblocks, we don't care.
+ */
+ if (pageblock_mt != MIGRATE_RESERVE && !is_migrate_cma(pageblock_mt))
+ return true;
+
+ return (freepage_mt == pageblock_mt);
+}
+
+/*
* Frees a number of pages from the PCP lists
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
@@ -698,9 +730,11 @@ static void free_pcppages_bulk(struct zone *zone, int count,
page = list_entry(list->prev, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
+
+ VM_BUG_ON_PAGE(!check_freepage_migratetype(page), page);
mt = get_freepage_migratetype(page);
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- __free_one_page(page, zone, 0, mt);
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
if (likely(!is_migrate_isolate_page(page))) {
__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
@@ -712,13 +746,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
spin_unlock(&zone->lock);
}
-static void free_one_page(struct zone *zone, struct page *page, int order,
+static void free_one_page(struct zone *zone,
+ struct page *page, unsigned long pfn,
+ unsigned int order,
int migratetype)
{
spin_lock(&zone->lock);
zone->pages_scanned = 0;
- __free_one_page(page, zone, order, migratetype);
+ __free_one_page(page, pfn, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
spin_unlock(&zone->lock);
@@ -755,15 +791,16 @@ static void __free_pages_ok(struct page *page, unsigned int order)
{
unsigned long flags;
int migratetype;
+ unsigned long pfn = page_to_pfn(page);
if (!free_pages_prepare(page, order))
return;
+ migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
- migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
- free_one_page(page_zone(page), page, order, migratetype);
+ free_one_page(page_zone(page), page, pfn, order, migratetype);
local_irq_restore(flags);
}
@@ -882,7 +919,7 @@ static inline int check_new_page(struct page *page)
return 0;
}
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
{
int i;
@@ -931,6 +968,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
rmv_page_order(page);
area->nr_free--;
expand(zone, page, order, current_order, area, migratetype);
+ set_freepage_migratetype(page, migratetype);
return page;
}
@@ -1057,7 +1095,9 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
/*
* When borrowing from MIGRATE_CMA, we need to release the excess
- * buddy pages to CMA itself.
+ * buddy pages to CMA itself. We also ensure the freepage_migratetype
+ * is set to CMA so it is returned to the correct freelist in case
+ * the page ends up being not actually allocated from the pcp lists.
*/
if (is_migrate_cma(fallback_type))
return fallback_type;
@@ -1090,16 +1130,17 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
{
struct free_area *area;
- int current_order;
+ unsigned int current_order;
struct page *page;
int migratetype, new_type, i;
/* Find the largest possible block of pages in the other list */
- for (current_order = MAX_ORDER-1; current_order >= order;
- --current_order) {
+ for (current_order = MAX_ORDER-1;
+ current_order >= order && current_order <= MAX_ORDER-1;
+ --current_order) {
for (i = 0;; i++) {
migratetype = fallbacks[start_migratetype][i];
@@ -1125,6 +1166,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
expand(zone, page, order, current_order, area,
new_type);
+ /* The freepage_migratetype may differ from pageblock's
+ * migratetype depending on the decisions in
+ * try_to_steal_freepages. This is OK as long as it does
+ * not differ for MIGRATE_CMA type.
+ */
+ set_freepage_migratetype(page, new_type);
trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, migratetype, new_type);
@@ -1173,15 +1220,16 @@ retry_reserve:
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
- int migratetype, int cold)
+ int migratetype, bool cold)
{
- int mt = migratetype, i;
+ int i;
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
break;
+ VM_BUG_ON_PAGE(!check_freepage_migratetype(page), page);
/*
* Split buddy pages returned by expand() are received here
@@ -1192,18 +1240,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* merge IO requests if the physical pages are ordered
* properly.
*/
- if (likely(cold == 0))
+ if (likely(!cold))
list_add(&page->lru, list);
else
list_add_tail(&page->lru, list);
- if (IS_ENABLED(CONFIG_CMA)) {
- mt = get_pageblock_migratetype(page);
- if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
- mt = migratetype;
- }
- set_freepage_migratetype(page, mt);
list = &page->lru;
- if (is_migrate_cma(mt))
+ if (is_migrate_cma(get_freepage_migratetype(page)))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
-(1 << order));
}
@@ -1327,7 +1369,7 @@ void mark_free_pages(struct zone *zone)
{
unsigned long pfn, max_zone_pfn;
unsigned long flags;
- int order, t;
+ unsigned int order, t;
struct list_head *curr;
if (zone_is_empty(zone))
@@ -1359,19 +1401,20 @@ void mark_free_pages(struct zone *zone)
/*
* Free a 0-order page
- * cold == 1 ? free a cold page : free a hot page
+ * cold == true ? free a cold page : free a hot page
*/
-void free_hot_cold_page(struct page *page, int cold)
+void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
unsigned long flags;
+ unsigned long pfn = page_to_pfn(page);
int migratetype;
if (!free_pages_prepare(page, 0))
return;
- migratetype = get_pageblock_migratetype(page);
+ migratetype = get_pfnblock_migratetype(page, pfn);
set_freepage_migratetype(page, migratetype);
local_irq_save(flags);
__count_vm_event(PGFREE);
@@ -1385,17 +1428,17 @@ void free_hot_cold_page(struct page *page, int cold)
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
- free_one_page(zone, page, 0, migratetype);
+ free_one_page(zone, page, pfn, 0, migratetype);
goto out;
}
migratetype = MIGRATE_MOVABLE;
}
pcp = &this_cpu_ptr(zone->pageset)->pcp;
- if (cold)
- list_add_tail(&page->lru, &pcp->lists[migratetype]);
- else
+ if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
+ else
+ list_add_tail(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = ACCESS_ONCE(pcp->batch);
@@ -1410,7 +1453,7 @@ out:
/*
* Free a list of 0-order pages
*/
-void free_hot_cold_page_list(struct list_head *list, int cold)
+void free_hot_cold_page_list(struct list_head *list, bool cold)
{
struct page *page, *next;
@@ -1522,12 +1565,12 @@ int split_free_page(struct page *page)
*/
static inline
struct page *buffered_rmqueue(struct zone *preferred_zone,
- struct zone *zone, int order, gfp_t gfp_flags,
- int migratetype)
+ struct zone *zone, unsigned int order,
+ gfp_t gfp_flags, int migratetype)
{
unsigned long flags;
struct page *page;
- int cold = !!(gfp_flags & __GFP_COLD);
+ bool cold = ((gfp_flags & __GFP_COLD) != 0);
again:
if (likely(order == 0)) {
@@ -1572,7 +1615,7 @@ again:
if (!page)
goto failed;
__mod_zone_freepage_state(zone, -(1 << order),
- get_pageblock_migratetype(page));
+ get_freepage_migratetype(page));
}
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
@@ -1672,8 +1715,9 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
* Return true if free pages are above 'mark'. This takes into account the order
* of the allocation.
*/
-static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int alloc_flags, long free_pages)
+static bool __zone_watermark_ok(struct zone *z, unsigned int order,
+ unsigned long mark, int classzone_idx, int alloc_flags,
+ long free_pages)
{
/* free_pages my go negative - that's OK */
long min = mark;
@@ -1707,15 +1751,15 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
return true;
}
-bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
zone_page_state(z, NR_FREE_PAGES));
}
-bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int alloc_flags)
+bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+ unsigned long mark, int classzone_idx, int alloc_flags)
{
long free_pages = zone_page_state(z, NR_FREE_PAGES);
@@ -1850,18 +1894,8 @@ static bool zone_local(struct zone *local_zone, struct zone *zone)
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
- return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
-}
-
-static void __paginginit init_zone_allows_reclaim(int nid)
-{
- int i;
-
- for_each_node_state(i, N_MEMORY)
- if (node_distance(nid, i) <= RECLAIM_DISTANCE)
- node_set(i, NODE_DATA(nid)->reclaim_nodes);
- else
- zone_reclaim_mode = 1;
+ return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
+ RECLAIM_DISTANCE;
}
#else /* CONFIG_NUMA */
@@ -1895,9 +1929,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
return true;
}
-static inline void init_zone_allows_reclaim(int nid)
-{
-}
#endif /* CONFIG_NUMA */
/*
@@ -1916,6 +1947,8 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
int zlc_active = 0; /* set if using zonelist_cache */
int did_zlc_setup = 0; /* just call zlc_setup() one time */
+ bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
+ (gfp_mask & __GFP_WRITE);
classzone_idx = zone_idx(preferred_zone);
zonelist_scan:
@@ -1930,12 +1963,10 @@ zonelist_scan:
if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
- if ((alloc_flags & ALLOC_CPUSET) &&
+ if (cpusets_enabled() &&
+ (alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask))
continue;
- BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
- if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS))
- goto try_this_zone;
/*
* Distribute pages in proportion to the individual
* zone size to ensure fair page aging. The zone a
@@ -1974,15 +2005,19 @@ zonelist_scan:
* will require awareness of zones in the
* dirty-throttling and the flusher threads.
*/
- if ((alloc_flags & ALLOC_WMARK_LOW) &&
- (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
- goto this_zone_full;
+ if (consider_zone_dirty && !zone_dirty_ok(zone))
+ continue;
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
if (!zone_watermark_ok(zone, order, mark,
classzone_idx, alloc_flags)) {
int ret;
+ /* Checked here to keep the fast path fast */
+ BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
+ if (alloc_flags & ALLOC_NO_WATERMARKS)
+ goto try_this_zone;
+
if (IS_ENABLED(CONFIG_NUMA) &&
!did_zlc_setup && nr_online_nodes > 1) {
/*
@@ -2044,7 +2079,7 @@ try_this_zone:
if (page)
break;
this_zone_full:
- if (IS_ENABLED(CONFIG_NUMA))
+ if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
zlc_mark_zone_full(zonelist, z);
}
@@ -2226,7 +2261,7 @@ static struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
- int migratetype, bool sync_migration,
+ int migratetype, enum migrate_mode mode,
bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
@@ -2240,7 +2275,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
- nodemask, sync_migration,
+ nodemask, mode,
contended_compaction);
current->flags &= ~PF_MEMALLOC;
@@ -2273,7 +2308,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
* As async compaction considers a subset of pageblocks, only
* defer if the failure was a sync compaction failure.
*/
- if (sync_migration)
+ if (mode != MIGRATE_ASYNC)
defer_compaction(preferred_zone, order);
cond_resched();
@@ -2286,9 +2321,8 @@ static inline struct page *
__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
- int migratetype, bool sync_migration,
- bool *contended_compaction, bool *deferred_compaction,
- unsigned long *did_some_progress)
+ int migratetype, enum migrate_mode mode, bool *contended_compaction,
+ bool *deferred_compaction, unsigned long *did_some_progress)
{
return NULL;
}
@@ -2483,7 +2517,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
int alloc_flags;
unsigned long pages_reclaimed = 0;
unsigned long did_some_progress;
- bool sync_migration = false;
+ enum migrate_mode migration_mode = MIGRATE_ASYNC;
bool deferred_compaction = false;
bool contended_compaction = false;
@@ -2577,17 +2611,22 @@ rebalance:
* Try direct compaction. The first pass is asynchronous. Subsequent
* attempts after direct reclaim are synchronous
*/
- page = __alloc_pages_direct_compact(gfp_mask, order,
- zonelist, high_zoneidx,
- nodemask,
- alloc_flags, preferred_zone,
- migratetype, sync_migration,
- &contended_compaction,
+ page = __alloc_pages_direct_compact(gfp_mask, order, zonelist,
+ high_zoneidx, nodemask, alloc_flags,
+ preferred_zone, migratetype,
+ migration_mode, &contended_compaction,
&deferred_compaction,
&did_some_progress);
if (page)
goto got_pg;
- sync_migration = true;
+
+ /*
+ * It can become very expensive to allocate transparent hugepages at
+ * fault, so use asynchronous memory compaction for THP unless it is
+ * khugepaged trying to collapse.
+ */
+ if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
+ migration_mode = MIGRATE_SYNC_LIGHT;
/*
* If compaction is deferred for high-order allocations, it is because
@@ -2662,12 +2701,10 @@ rebalance:
* direct reclaim and reclaim/compaction depends on compaction
* being called after reclaim so call directly if necessary
*/
- page = __alloc_pages_direct_compact(gfp_mask, order,
- zonelist, high_zoneidx,
- nodemask,
- alloc_flags, preferred_zone,
- migratetype, sync_migration,
- &contended_compaction,
+ page = __alloc_pages_direct_compact(gfp_mask, order, zonelist,
+ high_zoneidx, nodemask, alloc_flags,
+ preferred_zone, migratetype,
+ migration_mode, &contended_compaction,
&deferred_compaction,
&did_some_progress);
if (page)
@@ -2697,7 +2734,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
- struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2716,13 +2752,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
- /*
- * Will only have any effect when __GFP_KMEMCG is set. This is
- * verified in the (always inline) callee
- */
- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
- return NULL;
-
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
@@ -2782,8 +2811,6 @@ out:
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
- memcg_kmem_commit_charge(page, memcg, order);
-
return page;
}
EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2818,7 +2845,7 @@ void __free_pages(struct page *page, unsigned int order)
{
if (put_page_testzero(page)) {
if (order == 0)
- free_hot_cold_page(page, 0);
+ free_hot_cold_page(page, false);
else
__free_pages_ok(page, order);
}
@@ -2837,27 +2864,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
- *
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
*
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
*/
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages(gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+ struct page *page;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+ page = alloc_pages_node(nid, gfp_mask, order);
+ memcg_kmem_commit_charge(page, memcg, order);
+ return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
+ */
+void __free_kmem_pages(struct page *page, unsigned int order)
{
memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order);
}
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
- __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ __free_kmem_pages(virt_to_page((void *)addr), order);
}
}
@@ -4095,7 +4146,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
static void __meminit zone_init_free_lists(struct zone *zone)
{
- int order, t;
+ unsigned int order, t;
for_each_migratetype_order(order, t) {
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
@@ -4921,8 +4972,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
- if (node_state(nid, N_MEMORY))
- init_zone_allows_reclaim(nid);
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
#endif
@@ -6009,53 +6058,64 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
* @end_bitidx: The last bit of interest
* returns pageblock_bits flags
*/
-unsigned long get_pageblock_flags_group(struct page *page,
- int start_bitidx, int end_bitidx)
+unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask)
{
struct zone *zone;
unsigned long *bitmap;
- unsigned long pfn, bitidx;
- unsigned long flags = 0;
- unsigned long value = 1;
+ unsigned long bitidx, word_bitidx;
+ unsigned long word;
zone = page_zone(page);
- pfn = page_to_pfn(page);
bitmap = get_pageblock_bitmap(zone, pfn);
bitidx = pfn_to_bitidx(zone, pfn);
+ word_bitidx = bitidx / BITS_PER_LONG;
+ bitidx &= (BITS_PER_LONG-1);
- for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
- if (test_bit(bitidx + start_bitidx, bitmap))
- flags |= value;
-
- return flags;
+ word = bitmap[word_bitidx];
+ bitidx += end_bitidx;
+ return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
}
/**
- * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages
+ * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
* @page: The page within the block of interest
* @start_bitidx: The first bit of interest
* @end_bitidx: The last bit of interest
* @flags: The flags to set
*/
-void set_pageblock_flags_group(struct page *page, unsigned long flags,
- int start_bitidx, int end_bitidx)
+void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
+ unsigned long pfn,
+ unsigned long end_bitidx,
+ unsigned long mask)
{
struct zone *zone;
unsigned long *bitmap;
- unsigned long pfn, bitidx;
- unsigned long value = 1;
+ unsigned long bitidx, word_bitidx;
+ unsigned long old_word, word;
+
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
zone = page_zone(page);
- pfn = page_to_pfn(page);
bitmap = get_pageblock_bitmap(zone, pfn);
bitidx = pfn_to_bitidx(zone, pfn);
+ word_bitidx = bitidx / BITS_PER_LONG;
+ bitidx &= (BITS_PER_LONG-1);
+
VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
- for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
- if (flags & value)
- __set_bit(bitidx + start_bitidx, bitmap);
- else
- __clear_bit(bitidx + start_bitidx, bitmap);
+ bitidx += end_bitidx;
+ mask <<= (BITS_PER_LONG - bitidx - 1);
+ flags <<= (BITS_PER_LONG - bitidx - 1);
+
+ word = ACCESS_ONCE(bitmap[word_bitidx]);
+ for (;;) {
+ old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
+ if (word == old_word)
+ break;
+ word = old_word;
+ }
}
/*
@@ -6215,7 +6275,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
cc->nr_migratepages -= nr_reclaimed;
ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
- 0, MIGRATE_SYNC, MR_CMA);
+ NULL, 0, MIGRATE_SYNC, MR_CMA);
}
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
@@ -6254,7 +6314,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
.nr_migratepages = 0,
.order = -1,
.zone = page_zone(pfn_to_page(start)),
- .sync = true,
+ .mode = MIGRATE_SYNC_LIGHT,
.ignore_skip_hint = true,
};
INIT_LIST_HEAD(&cc.migratepages);
@@ -6409,7 +6469,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
{
struct page *page;
struct zone *zone;
- int order, i;
+ unsigned int order, i;
unsigned long pfn;
unsigned long flags;
/* find the first valid pfn */
@@ -6461,7 +6521,7 @@ bool is_free_buddy_page(struct page *page)
struct zone *zone = page_zone(page);
unsigned long pfn = page_to_pfn(page);
unsigned long flags;
- int order;
+ unsigned int order;
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
diff --git a/mm/page_io.c b/mm/page_io.c
index 33bb38c4aad7..243a9b76e5ce 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -248,11 +248,16 @@ out:
return ret;
}
+static sector_t swap_page_sector(struct page *page)
+{
+ return (sector_t)__page_file_index(page) << (PAGE_CACHE_SHIFT - 9);
+}
+
int __swap_writepage(struct page *page, struct writeback_control *wbc,
void (*end_write_func)(struct bio *, int))
{
struct bio *bio;
- int ret = 0, rw = WRITE;
+ int ret, rw = WRITE;
struct swap_info_struct *sis = page_swap_info(page);
if (sis->flags & SWP_FILE) {
@@ -304,6 +309,13 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
return ret;
}
+ ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
+ if (!ret) {
+ count_vm_event(PSWPOUT);
+ return 0;
+ }
+
+ ret = 0;
bio = get_swap_bio(GFP_NOIO, page, end_write_func);
if (bio == NULL) {
set_page_dirty(page);
@@ -345,6 +357,13 @@ int swap_readpage(struct page *page)
return ret;
}
+ ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
+ if (!ret) {
+ count_vm_event(PSWPIN);
+ return 0;
+ }
+
+ ret = 0;
bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
if (bio == NULL) {
unlock_page(page);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 2beeabf502c5..b2a075ffb96e 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -3,29 +3,58 @@
#include <linux/sched.h>
#include <linux/hugetlb.h>
-static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+/*
+ * Check the current skip status of page table walker.
+ *
+ * Here what I mean by skip is to skip lower level walking, and that was
+ * determined for each entry independently. For example, when walk_pmd_range
+ * handles a pmd_trans_huge we don't have to walk over ptes under that pmd,
+ * and the skipping does not affect the walking over ptes under other pmds.
+ * That's why we reset @walk->skip after tested.
+ */
+static bool skip_lower_level_walking(struct mm_walk *walk)
{
+ if (walk->skip) {
+ walk->skip = 0;
+ return true;
+ }
+ return false;
+}
+
+static int walk_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct mm_struct *mm = walk->mm;
pte_t *pte;
+ pte_t *orig_pte;
+ spinlock_t *ptl;
int err = 0;
- pte = pte_offset_map(pmd, addr);
- for (;;) {
+ orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ do {
+ if (pte_none(*pte)) {
+ if (walk->pte_hole)
+ err = walk->pte_hole(addr, addr + PAGE_SIZE,
+ walk);
+ if (err)
+ break;
+ continue;
+ }
+ /*
+ * Callers should have their own way to handle swap entries
+ * in walk->pte_entry().
+ */
err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err)
break;
- addr += PAGE_SIZE;
- if (addr == end)
- break;
- pte++;
- }
-
- pte_unmap(pte);
- return err;
+ } while (pte++, addr += PAGE_SIZE, addr < end);
+ pte_unmap_unlock(orig_pte, ptl);
+ cond_resched();
+ return addr == end ? 0 : err;
}
-static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int walk_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
pmd_t *pmd;
unsigned long next;
@@ -35,6 +64,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
do {
again:
next = pmd_addr_end(addr, end);
+
if (pmd_none(*pmd)) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
@@ -42,35 +72,32 @@ again:
break;
continue;
}
- /*
- * This implies that each ->pmd_entry() handler
- * needs to know about pmd_trans_huge() pmds
- */
- if (walk->pmd_entry)
- err = walk->pmd_entry(pmd, addr, next, walk);
- if (err)
- break;
- /*
- * Check this here so we only break down trans_huge
- * pages when we _need_ to
- */
- if (!walk->pte_entry)
- continue;
+ if (walk->pmd_entry) {
+ err = walk->pmd_entry(pmd, addr, next, walk);
+ if (skip_lower_level_walking(walk))
+ continue;
+ if (err)
+ break;
+ }
- split_huge_page_pmd_mm(walk->mm, addr, pmd);
- if (pmd_none_or_trans_huge_or_clear_bad(pmd))
- goto again;
- err = walk_pte_range(pmd, addr, next, walk);
- if (err)
- break;
- } while (pmd++, addr = next, addr != end);
+ if (walk->pte_entry) {
+ if (walk->vma) {
+ split_huge_page_pmd(walk->vma, addr, pmd);
+ if (pmd_trans_unstable(pmd))
+ goto again;
+ }
+ err = walk_pte_range(pmd, addr, next, walk);
+ if (err)
+ break;
+ }
+ } while (pmd++, addr = next, addr < end);
return err;
}
-static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int walk_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
pud_t *pud;
unsigned long next;
@@ -79,6 +106,7 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+
if (pud_none_or_clear_bad(pud)) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
@@ -86,13 +114,58 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
break;
continue;
}
- if (walk->pud_entry)
+
+ if (walk->pud_entry) {
err = walk->pud_entry(pud, addr, next, walk);
- if (!err && (walk->pmd_entry || walk->pte_entry))
+ if (skip_lower_level_walking(walk))
+ continue;
+ if (err)
+ break;
+ }
+
+ if (walk->pmd_entry || walk->pte_entry) {
err = walk_pmd_range(pud, addr, next, walk);
- if (err)
- break;
- } while (pud++, addr = next, addr != end);
+ if (err)
+ break;
+ }
+ } while (pud++, addr = next, addr < end);
+
+ return err;
+}
+
+static int walk_pgd_range(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ int err = 0;
+
+ pgd = pgd_offset(walk->mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+
+ if (pgd_none_or_clear_bad(pgd)) {
+ if (walk->pte_hole)
+ err = walk->pte_hole(addr, next, walk);
+ if (err)
+ break;
+ continue;
+ }
+
+ if (walk->pgd_entry) {
+ err = walk->pgd_entry(pgd, addr, next, walk);
+ if (skip_lower_level_walking(walk))
+ continue;
+ if (err)
+ break;
+ }
+
+ if (walk->pud_entry || walk->pmd_entry || walk->pte_entry) {
+ err = walk_pud_range(pgd, addr, next, walk);
+ if (err)
+ break;
+ }
+ } while (pgd++, addr = next, addr < end);
return err;
}
@@ -105,144 +178,180 @@ static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
return boundary < end ? boundary : end;
}
-static int walk_hugetlb_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int walk_hugetlb_range(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
{
+ struct mm_struct *mm = walk->mm;
+ struct vm_area_struct *vma = walk->vma;
struct hstate *h = hstate_vma(vma);
unsigned long next;
unsigned long hmask = huge_page_mask(h);
pte_t *pte;
int err = 0;
+ spinlock_t *ptl;
do {
next = hugetlb_entry_end(h, addr, end);
pte = huge_pte_offset(walk->mm, addr & hmask);
- if (pte && walk->hugetlb_entry)
- err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
+ if (!pte)
+ continue;
+ ptl = huge_pte_lock(h, mm, pte);
+ /*
+ * Callers should have their own way to handle swap entries
+ * in walk->hugetlb_entry().
+ */
+ if (walk->hugetlb_entry)
+ err = walk->hugetlb_entry(pte, addr, next, walk);
+ spin_unlock(ptl);
if (err)
- return err;
+ break;
} while (addr = next, addr != end);
-
- return 0;
+ cond_resched();
+ return err;
}
#else /* CONFIG_HUGETLB_PAGE */
-static int walk_hugetlb_range(struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static inline int walk_hugetlb_range(unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
{
return 0;
}
#endif /* CONFIG_HUGETLB_PAGE */
+/*
+ * Decide whether we really walk over the current vma on [@start, @end)
+ * or skip it. When we skip it, we set @walk->skip to 1.
+ * The return value is used to control the page table walking to
+ * continue (for zero) or not (for non-zero).
+ *
+ * Default check (only VM_PFNMAP check for now) is used when the caller
+ * doesn't define test_walk() callback.
+ */
+static int walk_page_test(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+
+ if (walk->test_walk)
+ return walk->test_walk(start, end, walk);
+ /*
+ * Do not walk over vma(VM_PFNMAP), because we have no valid struct
+ * page backing a VM_PFNMAP range. See also commit a9ff785e4437.
+ */
+ if (vma->vm_flags & VM_PFNMAP)
+ walk->skip = 1;
+ return 0;
+}
+
+static int __walk_page_range(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ int err = 0;
+ struct vm_area_struct *vma = walk->vma;
+
+ if (vma && is_vm_hugetlb_page(vma)) {
+ if (walk->hugetlb_entry)
+ err = walk_hugetlb_range(start, end, walk);
+ } else
+ err = walk_pgd_range(start, end, walk);
+
+ return err;
+}
/**
- * walk_page_range - walk a memory map's page tables with a callback
- * @addr: starting address
- * @end: ending address
- * @walk: set of callbacks to invoke for each level of the tree
+ * walk_page_range - walk page table with caller specific callbacks
+ *
+ * Recursively walk the page table tree of the process represented by
+ * @walk->mm within the virtual address range [@start, @end). In walking,
+ * we can call caller-specific callback functions against each entry.
*
- * Recursively walk the page table for the memory area in a VMA,
- * calling supplied callbacks. Callbacks are called in-order (first
- * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
- * etc.). If lower-level callbacks are omitted, walking depth is reduced.
+ * Before starting to walk page table, some callers want to check whether
+ * they really want to walk over the vma (for example by checking vm_flags.)
+ * walk_page_test() and @walk->test_walk() do that check.
*
- * Each callback receives an entry pointer and the start and end of the
- * associated range, and a copy of the original mm_walk for access to
- * the ->private or ->mm fields.
+ * If any callback returns a non-zero value, the page table walk is aborted
+ * immediately and the return value is propagated back to the caller.
+ * Note that the meaning of the positive returned value can be defined
+ * by the caller for its own purpose.
*
- * Usually no locks are taken, but splitting transparent huge page may
- * take page table lock. And the bottom level iterator will map PTE
- * directories from highmem if necessary.
+ * If the caller defines multiple callbacks in different levels, the
+ * callbacks are called in depth-first manner. It could happen that
+ * multiple callbacks are called on a address. For example if some caller
+ * defines test_walk(), pmd_entry(), and pte_entry(), then callbacks are
+ * called in the order of test_walk(), pmd_entry(), and pte_entry().
+ * If you don't want to go down to lower level at some point and move to
+ * the next entry in the same level, you set @walk->skip to 1.
+ * For example if you succeed to handle some pmd entry as trans_huge entry,
+ * you need not call walk_pte_range() any more, so set it to avoid that.
+ * We can't determine whether to go down to lower level with the return
+ * value of the callback, because the whole range of return values (0, >0,
+ * and <0) are used up for other meanings.
*
- * If any callback returns a non-zero value, the walk is aborted and
- * the return value is propagated back to the caller. Otherwise 0 is returned.
+ * Each callback can access to the vma over which it is doing page table
+ * walk right now via @walk->vma. @walk->vma is set to NULL in walking
+ * outside a vma. If you want to access to some caller-specific data from
+ * callbacks, @walk->private should be helpful.
*
- * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
- * is !NULL.
+ * The callers should hold @walk->mm->mmap_sem. Note that the lower level
+ * iterators can take page table lock in lowest level iteration and/or
+ * in split_huge_page_pmd().
*/
-int walk_page_range(unsigned long addr, unsigned long end,
+int walk_page_range(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
- pgd_t *pgd;
- unsigned long next;
int err = 0;
+ struct vm_area_struct *vma;
+ unsigned long next;
- if (addr >= end)
- return err;
+ if (start >= end)
+ return -EINVAL;
if (!walk->mm)
return -EINVAL;
VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
- pgd = pgd_offset(walk->mm, addr);
do {
- struct vm_area_struct *vma = NULL;
+ vma = find_vma(walk->mm, start);
+ if (!vma) { /* after the last vma */
+ walk->vma = NULL;
+ next = end;
+ } else if (start < vma->vm_start) { /* outside the found vma */
+ walk->vma = NULL;
+ next = vma->vm_start;
+ } else { /* inside the found vma */
+ walk->vma = vma;
+ next = min(end, vma->vm_end);
- next = pgd_addr_end(addr, end);
-
- /*
- * This function was not intended to be vma based.
- * But there are vma special cases to be handled:
- * - hugetlb vma's
- * - VM_PFNMAP vma's
- */
- vma = find_vma(walk->mm, addr);
- if (vma) {
- /*
- * There are no page structures backing a VM_PFNMAP
- * range, so do not allow split_huge_page_pmd().
- */
- if ((vma->vm_start <= addr) &&
- (vma->vm_flags & VM_PFNMAP)) {
- next = vma->vm_end;
- pgd = pgd_offset(walk->mm, next);
- continue;
- }
- /*
- * Handle hugetlb vma individually because pagetable
- * walk for the hugetlb page is dependent on the
- * architecture and we can't handled it in the same
- * manner as non-huge pages.
- */
- if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
- is_vm_hugetlb_page(vma)) {
- if (vma->vm_end < next)
- next = vma->vm_end;
- /*
- * Hugepage is very tightly coupled with vma,
- * so walk through hugetlb entries within a
- * given vma.
- */
- err = walk_hugetlb_range(vma, addr, next, walk);
- if (err)
- break;
- pgd = pgd_offset(walk->mm, next);
+ err = walk_page_test(start, next, walk);
+ if (skip_lower_level_walking(walk))
continue;
- }
- }
-
- if (pgd_none_or_clear_bad(pgd)) {
- if (walk->pte_hole)
- err = walk->pte_hole(addr, next, walk);
if (err)
break;
- pgd++;
- continue;
}
- if (walk->pgd_entry)
- err = walk->pgd_entry(pgd, addr, next, walk);
- if (!err &&
- (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
- err = walk_pud_range(pgd, addr, next, walk);
+ err = __walk_page_range(start, next, walk);
if (err)
break;
- pgd++;
- } while (addr = next, addr < end);
-
+ } while (start = next, start < end);
return err;
}
+
+int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk)
+{
+ int err;
+
+ if (!walk->mm)
+ return -EINVAL;
+
+ VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
+ VM_BUG_ON(!vma);
+ walk->vma = vma;
+ err = walk_page_test(vma->vm_start, vma->vm_end, walk);
+ if (skip_lower_level_walking(walk))
+ return 0;
+ if (err)
+ return err;
+ return __walk_page_range(vma->vm_start, vma->vm_end, walk);
+}
diff --git a/mm/rmap.c b/mm/rmap.c
index 9c3e77396d1a..8c102ce18184 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -515,11 +515,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
static inline unsigned long
__vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
- if (unlikely(is_vm_hugetlb_page(vma)))
- pgoff = page->index << huge_page_order(page_hstate(page));
-
+ pgoff_t pgoff = page_pgoff(page);
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
}
@@ -669,7 +665,7 @@ struct page_referenced_arg {
/*
* arg: page_referenced_arg will be passed
*/
-int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
@@ -986,6 +982,12 @@ void do_page_add_anon_rmap(struct page *page,
{
int first = atomic_inc_and_test(&page->_mapcount);
if (first) {
+ /*
+ * We use the irq-unsafe __{inc|mod}_zone_page_stat because
+ * these counters are not modified from interrupt context, and
+ * pte lock(a spinlock) is held, which implies preemption
+ * disabled.
+ */
if (PageTransHuge(page))
__inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
@@ -1024,11 +1026,25 @@ void page_add_new_anon_rmap(struct page *page,
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
__page_set_anon_rmap(page, vma, address, 1);
- if (!mlocked_vma_newpage(vma, page)) {
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
SetPageActive(page);
lru_cache_add(page);
- } else
- add_page_to_unevictable_list(page);
+ return;
+ }
+
+ if (!TestSetPageMlocked(page)) {
+ /*
+ * We use the irq-unsafe __mod_zone_page_stat because this
+ * counter is not modified from interrupt context, and the pte
+ * lock is held(spinlock), which implies preemption disabled.
+ */
+ __mod_zone_page_state(page_zone(page), NR_MLOCK,
+ hpage_nr_pages(page));
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ }
+ add_page_to_unevictable_list(page);
}
/**
@@ -1077,6 +1093,10 @@ void page_remove_rmap(struct page *page)
/*
* Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
* and not charged by memcg for now.
+ *
+ * We use the irq-unsafe __{inc|mod}_zone_page_stat because
+ * these counters are not modified from interrupt context, and
+ * pte lock(a spinlock) is held, which implies preemption disabled.
*/
if (unlikely(PageHuge(page)))
goto out;
@@ -1112,7 +1132,7 @@ out:
/*
* @arg: enum ttu_flags will be passed to this argument
*/
-int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
@@ -1359,7 +1379,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
if (page->index != linear_page_index(vma, address)) {
pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(pteval))
- pte_file_mksoft_dirty(ptfile);
+ ptfile = pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, address, pte, ptfile);
}
@@ -1609,7 +1629,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_pgoff(page);
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
@@ -1650,7 +1670,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
{
struct address_space *mapping = page->mapping;
- pgoff_t pgoff = page->index << compound_order(page);
+ pgoff_t pgoff = page_pgoff(page);
struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
diff --git a/mm/shmem.c b/mm/shmem.c
index de834ab8b6b9..ad858c5a3529 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1132,7 +1132,7 @@ repeat:
goto decused;
}
- SetPageSwapBacked(page);
+ __SetPageSwapBacked(page);
__set_page_locked(page);
error = mem_cgroup_charge_file(page, current->mm,
gfp & GFP_RECLAIM_MASK);
@@ -1372,9 +1372,13 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
struct inode *inode = mapping->host;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+ ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+ if (*pagep)
+ init_page_accessed(*pagep);
+ return ret;
}
static int
diff --git a/mm/slab.c b/mm/slab.c
index 19d92181ce24..9ca3b87edabc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1621,10 +1621,16 @@ __initcall(cpucache_init);
static noinline void
slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
{
+#if DEBUG
struct kmem_cache_node *n;
struct page *page;
unsigned long flags;
int node;
+ static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
+ return;
printk(KERN_WARNING
"SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
@@ -1662,6 +1668,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
node, active_slabs, num_slabs, active_objs, num_objs,
free_objects);
}
+#endif
}
/*
@@ -1681,10 +1688,13 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
flags |= __GFP_RECLAIMABLE;
+ if (memcg_charge_slab(cachep, flags, cachep->gfporder))
+ return NULL;
+
page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
if (!page) {
- if (!(flags & __GFP_NOWARN) && printk_ratelimit())
- slab_out_of_memory(cachep, flags, nodeid);
+ memcg_uncharge_slab(cachep, cachep->gfporder);
+ slab_out_of_memory(cachep, flags, nodeid);
return NULL;
}
@@ -1702,7 +1712,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
__SetPageSlab(page);
if (page->pfmemalloc)
SetPageSlabPfmemalloc(page);
- memcg_bind_pages(cachep, cachep->gfporder);
if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1738,10 +1747,10 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
page_mapcount_reset(page);
page->mapping = NULL;
- memcg_release_pages(cachep, cachep->gfporder);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- __free_memcg_kmem_pages(page, cachep->gfporder);
+ __free_pages(page, cachep->gfporder);
+ memcg_uncharge_slab(cachep, cachep->gfporder);
}
static void kmem_rcu_free(struct rcu_head *head)
@@ -2469,8 +2478,7 @@ out:
return nr_freed;
}
-/* Called with slab_mutex held to protect against cpu hotplug */
-static int __cache_shrink(struct kmem_cache *cachep)
+int __kmem_cache_shrink(struct kmem_cache *cachep)
{
int ret = 0, i = 0;
struct kmem_cache_node *n;
@@ -2491,32 +2499,11 @@ static int __cache_shrink(struct kmem_cache *cachep)
return (ret ? 1 : 0);
}
-/**
- * kmem_cache_shrink - Shrink a cache.
- * @cachep: The cache to shrink.
- *
- * Releases as many slabs as possible for a cache.
- * To help debugging, a zero exit status indicates all slabs were released.
- */
-int kmem_cache_shrink(struct kmem_cache *cachep)
-{
- int ret;
- BUG_ON(!cachep || in_interrupt());
-
- get_online_cpus();
- mutex_lock(&slab_mutex);
- ret = __cache_shrink(cachep);
- mutex_unlock(&slab_mutex);
- put_online_cpus();
- return ret;
-}
-EXPORT_SYMBOL(kmem_cache_shrink);
-
int __kmem_cache_shutdown(struct kmem_cache *cachep)
{
int i;
struct kmem_cache_node *n;
- int rc = __cache_shrink(cachep);
+ int rc = __kmem_cache_shrink(cachep);
if (rc)
return rc;
diff --git a/mm/slab.h b/mm/slab.h
index 6bd4c353704f..961a3fb1f5a2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
int __kmem_cache_shutdown(struct kmem_cache *);
+int __kmem_cache_shrink(struct kmem_cache *);
void slab_kmem_cache_release(struct kmem_cache *);
struct seq_file;
@@ -120,21 +121,6 @@ static inline bool is_root_cache(struct kmem_cache *s)
return !s->memcg_params || s->memcg_params->is_root_cache;
}
-static inline void memcg_bind_pages(struct kmem_cache *s, int order)
-{
- if (!is_root_cache(s))
- atomic_add(1 << order, &s->memcg_params->nr_pages);
-}
-
-static inline void memcg_release_pages(struct kmem_cache *s, int order)
-{
- if (is_root_cache(s))
- return;
-
- if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
- mem_cgroup_destroy_cache(s);
-}
-
static inline bool slab_equal_or_root(struct kmem_cache *s,
struct kmem_cache *p)
{
@@ -192,18 +178,29 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
return s;
return s->memcg_params->root_cache;
}
-#else
-static inline bool is_root_cache(struct kmem_cache *s)
+
+static __always_inline int memcg_charge_slab(struct kmem_cache *s,
+ gfp_t gfp, int order)
{
- return true;
+ if (!memcg_kmem_enabled())
+ return 0;
+ if (is_root_cache(s))
+ return 0;
+ return __memcg_charge_slab(s, gfp, order);
}
-static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
{
+ if (!memcg_kmem_enabled())
+ return;
+ if (is_root_cache(s))
+ return;
+ __memcg_uncharge_slab(s, order);
}
-
-static inline void memcg_release_pages(struct kmem_cache *s, int order)
+#else
+static inline bool is_root_cache(struct kmem_cache *s)
{
+ return true;
}
static inline bool slab_equal_or_root(struct kmem_cache *s,
@@ -227,6 +224,15 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
{
return s;
}
+
+static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order)
+{
+ return 0;
+}
+
+static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+}
#endif
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 102cc6fca3d3..48fafb61f35e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -160,7 +160,6 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
s->refcount = 1;
list_add(&s->list, &slab_caches);
- memcg_register_cache(s);
out:
if (err)
return ERR_PTR(err);
@@ -205,6 +204,8 @@ kmem_cache_create(const char *name, size_t size, size_t align,
int err;
get_online_cpus();
+ get_online_mems();
+
mutex_lock(&slab_mutex);
err = kmem_cache_sanity_check(name, size);
@@ -239,6 +240,8 @@ kmem_cache_create(const char *name, size_t size, size_t align,
out_unlock:
mutex_unlock(&slab_mutex);
+
+ put_online_mems();
put_online_cpus();
if (err) {
@@ -258,31 +261,29 @@ EXPORT_SYMBOL(kmem_cache_create);
#ifdef CONFIG_MEMCG_KMEM
/*
- * kmem_cache_create_memcg - Create a cache for a memory cgroup.
+ * memcg_create_kmem_cache - Create a cache for a memory cgroup.
* @memcg: The memory cgroup the new cache is for.
* @root_cache: The parent of the new cache.
+ * @memcg_name: The name of the memory cgroup (used for naming the new cache).
*
* This function attempts to create a kmem cache that will serve allocation
* requests going from @memcg to @root_cache. The new cache inherits properties
* from its parent.
*/
-void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+ struct kmem_cache *root_cache,
+ const char *memcg_name)
{
- struct kmem_cache *s;
+ struct kmem_cache *s = NULL;
char *cache_name;
get_online_cpus();
- mutex_lock(&slab_mutex);
+ get_online_mems();
- /*
- * Since per-memcg caches are created asynchronously on first
- * allocation (see memcg_kmem_get_cache()), several threads can try to
- * create the same cache, but only one of them may succeed.
- */
- if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
- goto out_unlock;
+ mutex_lock(&slab_mutex);
- cache_name = memcg_create_cache_name(memcg, root_cache);
+ cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
+ memcg_cache_id(memcg), memcg_name);
if (!cache_name)
goto out_unlock;
@@ -292,17 +293,19 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
memcg, root_cache);
if (IS_ERR(s)) {
kfree(cache_name);
- goto out_unlock;
+ s = NULL;
}
- s->allocflags |= __GFP_KMEMCG;
-
out_unlock:
mutex_unlock(&slab_mutex);
+
+ put_online_mems();
put_online_cpus();
+
+ return s;
}
-static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+static int memcg_cleanup_cache_params(struct kmem_cache *s)
{
int rc;
@@ -311,13 +314,13 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
return 0;
mutex_unlock(&slab_mutex);
- rc = __kmem_cache_destroy_memcg_children(s);
+ rc = __memcg_cleanup_cache_params(s);
mutex_lock(&slab_mutex);
return rc;
}
#else
-static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+static int memcg_cleanup_cache_params(struct kmem_cache *s)
{
return 0;
}
@@ -332,21 +335,20 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s)
{
get_online_cpus();
+ get_online_mems();
+
mutex_lock(&slab_mutex);
s->refcount--;
if (s->refcount)
goto out_unlock;
- if (kmem_cache_destroy_memcg_children(s) != 0)
+ if (memcg_cleanup_cache_params(s) != 0)
goto out_unlock;
list_del(&s->list);
- memcg_unregister_cache(s);
-
if (__kmem_cache_shutdown(s) != 0) {
list_add(&s->list, &slab_caches);
- memcg_register_cache(s);
printk(KERN_ERR "kmem_cache_destroy %s: "
"Slab cache still has objects\n", s->name);
dump_stack();
@@ -363,15 +365,36 @@ void kmem_cache_destroy(struct kmem_cache *s)
#else
slab_kmem_cache_release(s);
#endif
- goto out_put_cpus;
+ goto out;
out_unlock:
mutex_unlock(&slab_mutex);
-out_put_cpus:
+out:
+ put_online_mems();
put_online_cpus();
}
EXPORT_SYMBOL(kmem_cache_destroy);
+/**
+ * kmem_cache_shrink - Shrink a cache.
+ * @cachep: The cache to shrink.
+ *
+ * Releases as many slabs as possible for a cache.
+ * To help debugging, a zero exit status indicates all slabs were released.
+ */
+int kmem_cache_shrink(struct kmem_cache *cachep)
+{
+ int ret;
+
+ get_online_cpus();
+ get_online_mems();
+ ret = __kmem_cache_shrink(cachep);
+ put_online_mems();
+ put_online_cpus();
+ return ret;
+}
+EXPORT_SYMBOL(kmem_cache_shrink);
+
int slab_is_available(void)
{
return slab_state >= UP;
@@ -586,6 +609,24 @@ void __init create_kmalloc_caches(unsigned long flags)
}
#endif /* !CONFIG_SLOB */
+/*
+ * To avoid unnecessary overhead, we pass through large allocation requests
+ * directly to the page allocator. We use __GFP_COMP, because we will need to
+ * know the allocation order to free the pages properly in kfree.
+ */
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+ void *ret;
+ struct page *page;
+
+ flags |= __GFP_COMP;
+ page = alloc_kmem_pages(flags, order);
+ ret = page ? page_address(page) : NULL;
+ kmemleak_alloc(ret, size, 1, flags);
+ return ret;
+}
+EXPORT_SYMBOL(kmalloc_order);
+
#ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
diff --git a/mm/slob.c b/mm/slob.c
index 730cad45d4be..21980e0f39a8 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -620,11 +620,10 @@ int __kmem_cache_shutdown(struct kmem_cache *c)
return 0;
}
-int kmem_cache_shrink(struct kmem_cache *d)
+int __kmem_cache_shrink(struct kmem_cache *d)
{
return 0;
}
-EXPORT_SYMBOL(kmem_cache_shrink);
struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache",
diff --git a/mm/slub.c b/mm/slub.c
index 2b1ce697fc4b..fdf0fe4da9a9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -403,7 +403,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
stat(s, CMPXCHG_DOUBLE_FAIL);
#ifdef SLUB_DEBUG_CMPXCHG
- printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
+ pr_info("%s %s: cmpxchg double redo ", n, s->name);
#endif
return 0;
@@ -444,7 +444,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
stat(s, CMPXCHG_DOUBLE_FAIL);
#ifdef SLUB_DEBUG_CMPXCHG
- printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
+ pr_info("%s %s: cmpxchg double redo ", n, s->name);
#endif
return 0;
@@ -546,14 +546,14 @@ static void print_track(const char *s, struct track *t)
if (!t->addr)
return;
- printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
- s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
+ pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
+ s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
#ifdef CONFIG_STACKTRACE
{
int i;
for (i = 0; i < TRACK_ADDRS_COUNT; i++)
if (t->addrs[i])
- printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
+ pr_err("\t%pS\n", (void *)t->addrs[i]);
else
break;
}
@@ -571,38 +571,37 @@ static void print_tracking(struct kmem_cache *s, void *object)
static void print_page_info(struct page *page)
{
- printk(KERN_ERR
- "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
+ pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
page, page->objects, page->inuse, page->freelist, page->flags);
}
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
- char buf[100];
va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), fmt, args);
- va_end(args);
- printk(KERN_ERR "========================================"
- "=====================================\n");
- printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
- printk(KERN_ERR "----------------------------------------"
- "-------------------------------------\n\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_err("=============================================================================\n");
+ pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
+ pr_err("-----------------------------------------------------------------------------\n\n");
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ va_end(args);
}
static void slab_fix(struct kmem_cache *s, char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
- char buf[100];
va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), fmt, args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_err("FIX %s: %pV\n", s->name, &vaf);
va_end(args);
- printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
}
static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
@@ -614,8 +613,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
print_page_info(page);
- printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
- p, p - addr, get_freepointer(s, p));
+ pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
+ p, p - addr, get_freepointer(s, p));
if (p > addr + 16)
print_section("Bytes b4 ", p - 16, 16);
@@ -698,7 +697,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
end--;
slab_bug(s, "%s overwritten", what);
- printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
+ pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault[0], value);
print_trailer(s, page, object);
@@ -931,7 +930,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
int alloc)
{
if (s->flags & SLAB_TRACE) {
- printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
+ pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
s->name,
alloc ? "alloc" : "free",
object, page->inuse,
@@ -1134,9 +1133,8 @@ static noinline struct kmem_cache_node *free_debug_processing(
slab_err(s, page, "Attempt to free object(0x%p) "
"outside of slab", object);
} else if (!page->slab_cache) {
- printk(KERN_ERR
- "SLUB <none>: no slab for object 0x%p.\n",
- object);
+ pr_err("SLUB <none>: no slab for object 0x%p.\n",
+ object);
dump_stack();
} else
object_err(s, page, object,
@@ -1219,8 +1217,8 @@ static int __init setup_slub_debug(char *str)
slub_debug |= SLAB_FAILSLAB;
break;
default:
- printk(KERN_ERR "slub_debug option '%c' "
- "unknown. skipped\n", *str);
+ pr_err("slub_debug option '%c' unknown. skipped\n",
+ *str);
}
}
@@ -1314,17 +1312,26 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
/*
* Slab allocation and freeing
*/
-static inline struct page *alloc_slab_page(gfp_t flags, int node,
- struct kmem_cache_order_objects oo)
+static inline struct page *alloc_slab_page(struct kmem_cache *s,
+ gfp_t flags, int node, struct kmem_cache_order_objects oo)
{
+ struct page *page;
int order = oo_order(oo);
flags |= __GFP_NOTRACK;
+ if (memcg_charge_slab(s, flags, order))
+ return NULL;
+
if (node == NUMA_NO_NODE)
- return alloc_pages(flags, order);
+ page = alloc_pages(flags, order);
else
- return alloc_pages_exact_node(node, flags, order);
+ page = alloc_pages_exact_node(node, flags, order);
+
+ if (!page)
+ memcg_uncharge_slab(s, order);
+
+ return page;
}
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1346,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
*/
alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
- page = alloc_slab_page(alloc_gfp, node, oo);
+ page = alloc_slab_page(s, alloc_gfp, node, oo);
if (unlikely(!page)) {
oo = s->min;
alloc_gfp = flags;
@@ -1354,7 +1361,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
* Allocation may have failed due to fragmentation.
* Try a lower order alloc if possible
*/
- page = alloc_slab_page(alloc_gfp, node, oo);
+ page = alloc_slab_page(s, alloc_gfp, node, oo);
if (page)
stat(s, ORDER_FALLBACK);
@@ -1415,7 +1422,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
order = compound_order(page);
inc_slabs_node(s, page_to_nid(page), page->objects);
- memcg_bind_pages(s, order);
page->slab_cache = s;
__SetPageSlab(page);
if (page->pfmemalloc)
@@ -1466,11 +1472,11 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
- memcg_release_pages(s, order);
page_mapcount_reset(page);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
- __free_memcg_kmem_pages(page, order);
+ __free_pages(page, order);
+ memcg_uncharge_slab(s, order);
}
#define need_reserve_slab_rcu \
@@ -1770,19 +1776,19 @@ static inline void note_cmpxchg_failure(const char *n,
#ifdef SLUB_DEBUG_CMPXCHG
unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
- printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
+ pr_info("%s %s: cmpxchg redo ", n, s->name);
#ifdef CONFIG_PREEMPT
if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
- printk("due to cpu change %d -> %d\n",
+ pr_warn("due to cpu change %d -> %d\n",
tid_to_cpu(tid), tid_to_cpu(actual_tid));
else
#endif
if (tid_to_event(tid) != tid_to_event(actual_tid))
- printk("due to cpu running other code. Event %ld->%ld\n",
+ pr_warn("due to cpu running other code. Event %ld->%ld\n",
tid_to_event(tid), tid_to_event(actual_tid));
else
- printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
+ pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
actual_tid, tid, next_tid(tid));
#endif
stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
@@ -2121,11 +2127,19 @@ static inline int node_match(struct page *page, int node)
return 1;
}
+#ifdef CONFIG_SLUB_DEBUG
static int count_free(struct page *page)
{
return page->objects - page->inuse;
}
+static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
+{
+ return atomic_long_read(&n->total_objects);
+}
+#endif /* CONFIG_SLUB_DEBUG */
+
+#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct page *))
{
@@ -2139,31 +2153,28 @@ static unsigned long count_partial(struct kmem_cache_node *n,
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
-
-static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
-{
-#ifdef CONFIG_SLUB_DEBUG
- return atomic_long_read(&n->total_objects);
-#else
- return 0;
-#endif
-}
+#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
static noinline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
{
+#ifdef CONFIG_SLUB_DEBUG
+ static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
int node;
- printk(KERN_WARNING
- "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
+ if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
+ return;
+
+ pr_warn("SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
nid, gfpflags);
- printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
- "default order: %d, min order: %d\n", s->name, s->object_size,
- s->size, oo_order(s->oo), oo_order(s->min));
+ pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n",
+ s->name, s->object_size, s->size, oo_order(s->oo),
+ oo_order(s->min));
if (oo_order(s->min) > get_order(s->object_size))
- printk(KERN_WARNING " %s debugging increased min order, use "
- "slub_debug=O to disable.\n", s->name);
+ pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
+ s->name);
for_each_online_node(node) {
struct kmem_cache_node *n = get_node(s, node);
@@ -2178,10 +2189,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);
- printk(KERN_WARNING
- " node %d: slabs: %ld, objs: %ld, free: %ld\n",
+ pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
node, nr_slabs, nr_objs, nr_free);
}
+#endif
}
static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
@@ -2198,7 +2209,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
page = new_slab(s, flags, node);
if (page) {
- c = __this_cpu_ptr(s->cpu_slab);
+ c = raw_cpu_ptr(s->cpu_slab);
if (c->page)
flush_slab(s, c);
@@ -2323,8 +2334,6 @@ redo:
if (freelist)
goto load_freelist;
- stat(s, ALLOC_SLOWPATH);
-
freelist = get_freelist(s, page);
if (!freelist) {
@@ -2360,9 +2369,7 @@ new_slab:
freelist = new_slab_objects(s, gfpflags, node, &c);
if (unlikely(!freelist)) {
- if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
- slab_out_of_memory(s, gfpflags, node);
-
+ slab_out_of_memory(s, gfpflags, node);
local_irq_restore(flags);
return NULL;
}
@@ -2418,7 +2425,7 @@ redo:
* and the retrieval of the tid.
*/
preempt_disable();
- c = __this_cpu_ptr(s->cpu_slab);
+ c = this_cpu_ptr(s->cpu_slab);
/*
* The transaction ids are globally unique per cpu and per operation on
@@ -2431,10 +2438,10 @@ redo:
object = c->freelist;
page = c->page;
- if (unlikely(!object || !node_match(page, node)))
+ if (unlikely(!object || !node_match(page, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
-
- else {
+ stat(s, ALLOC_SLOWPATH);
+ } else {
void *next_object = get_freepointer_safe(s, object);
/*
@@ -2674,7 +2681,7 @@ redo:
* during the cmpxchg then the free will succedd.
*/
preempt_disable();
- c = __this_cpu_ptr(s->cpu_slab);
+ c = this_cpu_ptr(s->cpu_slab);
tid = c->tid;
preempt_enable();
@@ -2894,10 +2901,8 @@ static void early_kmem_cache_node_alloc(int node)
BUG_ON(!page);
if (page_to_nid(page) != node) {
- printk(KERN_ERR "SLUB: Unable to allocate memory from "
- "node %d\n", node);
- printk(KERN_ERR "SLUB: Allocating a useless per node structure "
- "in order to be able to continue\n");
+ pr_err("SLUB: Unable to allocate memory from node %d\n", node);
+ pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
}
n = page->freelist;
@@ -3182,8 +3187,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
for_each_object(p, s, addr, page->objects) {
if (!test_bit(slab_index(p, s, addr), map)) {
- printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
- p, p - addr);
+ pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
print_tracking(s, p);
}
}
@@ -3305,8 +3309,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
- page = alloc_pages_node(node, flags, get_order(size));
+ flags |= __GFP_COMP | __GFP_NOTRACK;
+ page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3375,7 +3379,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kfree_hook(x);
- __free_memcg_kmem_pages(page, compound_order(page));
+ __free_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
@@ -3392,7 +3396,7 @@ EXPORT_SYMBOL(kfree);
* being allocated from last increasing the chance that the last objects
* are freed in them.
*/
-int kmem_cache_shrink(struct kmem_cache *s)
+int __kmem_cache_shrink(struct kmem_cache *s)
{
int node;
int i;
@@ -3448,7 +3452,6 @@ int kmem_cache_shrink(struct kmem_cache *s)
kfree(slabs_by_inuse);
return 0;
}
-EXPORT_SYMBOL(kmem_cache_shrink);
static int slab_mem_going_offline_callback(void *arg)
{
@@ -3456,7 +3459,7 @@ static int slab_mem_going_offline_callback(void *arg)
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list)
- kmem_cache_shrink(s);
+ __kmem_cache_shrink(s);
mutex_unlock(&slab_mutex);
return 0;
@@ -3650,9 +3653,7 @@ void __init kmem_cache_init(void)
register_cpu_notifier(&slab_notifier);
#endif
- printk(KERN_INFO
- "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
- " CPUs=%d, Nodes=%d\n",
+ pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
cache_line_size(),
slub_min_order, slub_max_order, slub_min_objects,
nr_cpu_ids, nr_node_ids);
@@ -3934,8 +3935,8 @@ static int validate_slab_node(struct kmem_cache *s,
count++;
}
if (count != n->nr_partial)
- printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
- "counter=%ld\n", s->name, count, n->nr_partial);
+ pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
+ s->name, count, n->nr_partial);
if (!(s->flags & SLAB_STORE_USER))
goto out;
@@ -3945,9 +3946,8 @@ static int validate_slab_node(struct kmem_cache *s,
count++;
}
if (count != atomic_long_read(&n->nr_slabs))
- printk(KERN_ERR "SLUB: %s %ld slabs counted but "
- "counter=%ld\n", s->name, count,
- atomic_long_read(&n->nr_slabs));
+ pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
+ s->name, count, atomic_long_read(&n->nr_slabs));
out:
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -4211,53 +4211,50 @@ static void resiliency_test(void)
BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
- printk(KERN_ERR "SLUB resiliency testing\n");
- printk(KERN_ERR "-----------------------\n");
- printk(KERN_ERR "A. Corruption after allocation\n");
+ pr_err("SLUB resiliency testing\n");
+ pr_err("-----------------------\n");
+ pr_err("A. Corruption after allocation\n");
p = kzalloc(16, GFP_KERNEL);
p[16] = 0x12;
- printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
- " 0x12->0x%p\n\n", p + 16);
+ pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
+ p + 16);
validate_slab_cache(kmalloc_caches[4]);
/* Hmmm... The next two are dangerous */
p = kzalloc(32, GFP_KERNEL);
p[32 + sizeof(void *)] = 0x34;
- printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
- " 0x34 -> -0x%p\n", p);
- printk(KERN_ERR
- "If allocated object is overwritten then not detectable\n\n");
+ pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
+ p);
+ pr_err("If allocated object is overwritten then not detectable\n\n");
validate_slab_cache(kmalloc_caches[5]);
p = kzalloc(64, GFP_KERNEL);
p += 64 + (get_cycles() & 0xff) * sizeof(void *);
*p = 0x56;
- printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
- p);
- printk(KERN_ERR
- "If allocated object is overwritten then not detectable\n\n");
+ pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
+ p);
+ pr_err("If allocated object is overwritten then not detectable\n\n");
validate_slab_cache(kmalloc_caches[6]);
- printk(KERN_ERR "\nB. Corruption after free\n");
+ pr_err("\nB. Corruption after free\n");
p = kzalloc(128, GFP_KERNEL);
kfree(p);
*p = 0x78;
- printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
+ pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
validate_slab_cache(kmalloc_caches[7]);
p = kzalloc(256, GFP_KERNEL);
kfree(p);
p[50] = 0x9a;
- printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
- p);
+ pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
validate_slab_cache(kmalloc_caches[8]);
p = kzalloc(512, GFP_KERNEL);
kfree(p);
p[512] = 0xab;
- printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
+ pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
validate_slab_cache(kmalloc_caches[9]);
}
#else
@@ -4332,7 +4329,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
}
- lock_memory_hotplug();
+ get_online_mems();
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
for_each_node_state(node, N_NORMAL_MEMORY) {
@@ -4372,7 +4369,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
#endif
- unlock_memory_hotplug();
+ put_online_mems();
kfree(nodes);
return x + sprintf(buf + x, "\n");
}
@@ -5303,7 +5300,7 @@ static int __init slab_sysfs_init(void)
slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
if (!slab_kset) {
mutex_unlock(&slab_mutex);
- printk(KERN_ERR "Cannot register slab subsystem.\n");
+ pr_err("Cannot register slab subsystem.\n");
return -ENOSYS;
}
@@ -5312,8 +5309,8 @@ static int __init slab_sysfs_init(void)
list_for_each_entry(s, &slab_caches, list) {
err = sysfs_slab_add(s);
if (err)
- printk(KERN_ERR "SLUB: Unable to add boot slab %s"
- " to sysfs\n", s->name);
+ pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
+ s->name);
}
while (alias_list) {
@@ -5322,8 +5319,8 @@ static int __init slab_sysfs_init(void)
alias_list = alias_list->next;
err = sysfs_slab_alias(al->s, al->name);
if (err)
- printk(KERN_ERR "SLUB: Unable to add boot slab alias"
- " %s to sysfs\n", al->name);
+ pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
+ al->name);
kfree(al);
}
diff --git a/mm/swap.c b/mm/swap.c
index 9ce43ba4498b..9e8e3472248b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -67,7 +67,7 @@ static void __page_cache_release(struct page *page)
static void __put_single_page(struct page *page)
{
__page_cache_release(page);
- free_hot_cold_page(page, 0);
+ free_hot_cold_page(page, false);
}
static void __put_compound_page(struct page *page)
@@ -79,95 +79,88 @@ static void __put_compound_page(struct page *page)
(*dtor)(page);
}
-static void put_compound_page(struct page *page)
+/**
+ * Two special cases here: we could avoid taking compound_lock_irqsave
+ * and could skip the tail refcounting(in _mapcount).
+ *
+ * 1. Hugetlbfs page:
+ *
+ * PageHeadHuge will remain true until the compound page
+ * is released and enters the buddy allocator, and it could
+ * not be split by __split_huge_page_refcount().
+ *
+ * So if we see PageHeadHuge set, and we have the tail page pin,
+ * then we could safely put head page.
+ *
+ * 2. Slab THP page:
+ *
+ * PG_slab is cleared before the slab frees the head page, and
+ * tail pin cannot be the last reference left on the head page,
+ * because the slab code is free to reuse the compound page
+ * after a kfree/kmem_cache_free without having to check if
+ * there's any tail pin left. In turn all tail pinsmust be always
+ * released while the head is still pinned by the slab code
+ * and so we know PG_slab will be still set too.
+ *
+ * So if we see PageSlab set, and we have the tail page pin,
+ * then we could safely put head page.
+ */
+static __always_inline
+void put_unrefcounted_compound_page(struct page *page_head, struct page *page)
{
- struct page *page_head;
-
- if (likely(!PageTail(page))) {
- if (put_page_testzero(page)) {
- /*
- * By the time all refcounts have been released
- * split_huge_page cannot run anymore from under us.
- */
- if (PageHead(page))
- __put_compound_page(page);
- else
- __put_single_page(page);
- }
- return;
- }
-
- /* __split_huge_page_refcount can run under us */
- page_head = compound_head(page);
-
/*
- * THP can not break up slab pages so avoid taking
- * compound_lock() and skip the tail page refcounting (in
- * _mapcount) too. Slab performs non-atomic bit ops on
- * page->flags for better performance. In particular
- * slab_unlock() in slub used to be a hot path. It is still
- * hot on arches that do not support
- * this_cpu_cmpxchg_double().
- *
- * If "page" is part of a slab or hugetlbfs page it cannot be
- * splitted and the head page cannot change from under us. And
- * if "page" is part of a THP page under splitting, if the
- * head page pointed by the THP tail isn't a THP head anymore,
- * we'll find PageTail clear after smp_rmb() and we'll treat
- * it as a single page.
+ * If @page is a THP tail, we must read the tail page
+ * flags after the head page flags. The
+ * __split_huge_page_refcount side enforces write memory barriers
+ * between clearing PageTail and before the head page
+ * can be freed and reallocated.
*/
- if (!__compound_tail_refcounted(page_head)) {
+ smp_rmb();
+ if (likely(PageTail(page))) {
/*
- * If "page" is a THP tail, we must read the tail page
- * flags after the head page flags. The
- * split_huge_page side enforces write memory barriers
- * between clearing PageTail and before the head page
- * can be freed and reallocated.
+ * __split_huge_page_refcount cannot race
+ * here, see the comment above this function.
*/
- smp_rmb();
- if (likely(PageTail(page))) {
- /*
- * __split_huge_page_refcount cannot race
- * here.
- */
- VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
- VM_BUG_ON_PAGE(page_mapcount(page) != 0, page);
- if (put_page_testzero(page_head)) {
- /*
- * If this is the tail of a slab
- * compound page, the tail pin must
- * not be the last reference held on
- * the page, because the PG_slab
- * cannot be cleared before all tail
- * pins (which skips the _mapcount
- * tail refcounting) have been
- * released. For hugetlbfs the tail
- * pin may be the last reference on
- * the page instead, because
- * PageHeadHuge will not go away until
- * the compound page enters the buddy
- * allocator.
- */
- VM_BUG_ON_PAGE(PageSlab(page_head), page_head);
- __put_compound_page(page_head);
- }
- return;
- } else
+ VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
+ VM_BUG_ON_PAGE(page_mapcount(page) != 0, page);
+ if (put_page_testzero(page_head)) {
/*
- * __split_huge_page_refcount run before us,
- * "page" was a THP tail. The split page_head
- * has been freed and reallocated as slab or
- * hugetlbfs page of smaller order (only
- * possible if reallocated as slab on x86).
+ * If this is the tail of a slab THP page,
+ * the tail pin must not be the last reference
+ * held on the page, because the PG_slab cannot
+ * be cleared before all tail pins (which skips
+ * the _mapcount tail refcounting) have been
+ * released.
+ *
+ * If this is the tail of a hugetlbfs page,
+ * the tail pin may be the last reference on
+ * the page instead, because PageHeadHuge will
+ * not go away until the compound page enters
+ * the buddy allocator.
*/
- goto out_put_single;
- }
+ VM_BUG_ON_PAGE(PageSlab(page_head), page_head);
+ __put_compound_page(page_head);
+ }
+ } else
+ /*
+ * __split_huge_page_refcount run before us,
+ * @page was a THP tail. The split @page_head
+ * has been freed and reallocated as slab or
+ * hugetlbfs page of smaller order (only
+ * possible if reallocated as slab on x86).
+ */
+ if (put_page_testzero(page))
+ __put_single_page(page);
+}
+static __always_inline
+void put_refcounted_compound_page(struct page *page_head, struct page *page)
+{
if (likely(page != page_head && get_page_unless_zero(page_head))) {
unsigned long flags;
/*
- * page_head wasn't a dangling pointer but it may not
+ * @page_head wasn't a dangling pointer but it may not
* be a head page anymore by the time we obtain the
* lock. That is ok as long as it can't be freed from
* under us.
@@ -178,7 +171,7 @@ static void put_compound_page(struct page *page)
compound_unlock_irqrestore(page_head, flags);
if (put_page_testzero(page_head)) {
/*
- * The head page may have been freed
+ * The @page_head may have been freed
* and reallocated as a compound page
* of smaller order and then freed
* again. All we know is that it
@@ -222,12 +215,51 @@ out_put_single:
__put_single_page(page_head);
}
} else {
- /* page_head is a dangling pointer */
+ /* @page_head is a dangling pointer */
VM_BUG_ON_PAGE(PageTail(page), page);
goto out_put_single;
}
}
+static void put_compound_page(struct page *page)
+{
+ struct page *page_head;
+
+ /*
+ * We see the PageCompound set and PageTail not set, so @page maybe:
+ * 1. hugetlbfs head page, or
+ * 2. THP head page.
+ */
+ if (likely(!PageTail(page))) {
+ if (put_page_testzero(page)) {
+ /*
+ * By the time all refcounts have been released
+ * split_huge_page cannot run anymore from under us.
+ */
+ if (PageHead(page))
+ __put_compound_page(page);
+ else
+ __put_single_page(page);
+ }
+ return;
+ }
+
+ /*
+ * We see the PageCompound set and PageTail set, so @page maybe:
+ * 1. a tail hugetlbfs page, or
+ * 2. a tail THP page, or
+ * 3. a split THP page.
+ *
+ * Case 3 is possible, as we may race with
+ * __split_huge_page_refcount tearing down a THP page.
+ */
+ page_head = compound_head_by_tail(page);
+ if (!__compound_tail_refcounted(page_head))
+ put_unrefcounted_compound_page(page_head, page);
+ else
+ put_refcounted_compound_page(page_head, page);
+}
+
void put_page(struct page *page)
{
if (unlikely(PageCompound(page)))
@@ -441,7 +473,7 @@ void rotate_reclaimable_page(struct page *page)
page_cache_get(page);
local_irq_save(flags);
- pvec = &__get_cpu_var(lru_rotate_pvecs);
+ pvec = this_cpu_ptr(&lru_rotate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_move_tail(pvec);
local_irq_restore(flags);
@@ -583,12 +615,17 @@ void mark_page_accessed(struct page *page)
EXPORT_SYMBOL(mark_page_accessed);
/*
- * Queue the page for addition to the LRU via pagevec. The decision on whether
- * to add the page to the [in]active [file|anon] list is deferred until the
- * pagevec is drained. This gives a chance for the caller of __lru_cache_add()
- * have the page added to the active list using mark_page_accessed().
+ * Used to mark_page_accessed(page) that is not visible yet and when it is
+ * still safe to use non-atomic ops
*/
-void __lru_cache_add(struct page *page)
+void init_page_accessed(struct page *page)
+{
+ if (!PageReferenced(page))
+ __SetPageReferenced(page);
+}
+EXPORT_SYMBOL(init_page_accessed);
+
+static void __lru_cache_add(struct page *page)
{
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -598,11 +635,34 @@ void __lru_cache_add(struct page *page)
pagevec_add(pvec, page);
put_cpu_var(lru_add_pvec);
}
-EXPORT_SYMBOL(__lru_cache_add);
+
+/**
+ * lru_cache_add: add a page to the page lists
+ * @page: the page to add
+ */
+void lru_cache_add_anon(struct page *page)
+{
+ if (PageActive(page))
+ ClearPageActive(page);
+ __lru_cache_add(page);
+}
+
+void lru_cache_add_file(struct page *page)
+{
+ if (PageActive(page))
+ ClearPageActive(page);
+ __lru_cache_add(page);
+}
+EXPORT_SYMBOL(lru_cache_add_file);
/**
* lru_cache_add - add a page to a page list
* @page: the page to be added to the LRU.
+ *
+ * Queue the page for addition to the LRU via pagevec. The decision on whether
+ * to add the page to the [in]active [file|anon] list is deferred until the
+ * pagevec is drained. This gives a chance for the caller of lru_cache_add()
+ * have the page added to the active list using mark_page_accessed().
*/
void lru_cache_add(struct page *page)
{
@@ -813,7 +873,7 @@ void lru_add_drain_all(void)
* grabbed the page via the LRU. If it did, give up: shrink_inactive_list()
* will free it.
*/
-void release_pages(struct page **pages, int nr, int cold)
+void release_pages(struct page **pages, int nr, bool cold)
{
int i;
LIST_HEAD(pages_to_free);
@@ -854,7 +914,7 @@ void release_pages(struct page **pages, int nr, int cold)
}
/* Clear Active bit in case of parallel mark_page_accessed */
- ClearPageActive(page);
+ __ClearPageActive(page);
list_add(&page->lru, &pages_to_free);
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e76ace30d436..2972eee184a4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -270,7 +270,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
for (i = 0; i < todo; i++)
free_swap_cache(pagep[i]);
- release_pages(pagep, todo, 0);
+ release_pages(pagep, todo, false);
pagep += todo;
nr -= todo;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4a7f7e6992b6..beeeef8a1b2d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -51,14 +51,32 @@ atomic_long_t nr_swap_pages;
/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
long total_swap_pages;
static int least_priority;
-static atomic_t highest_priority_index = ATOMIC_INIT(-1);
static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
static const char Unused_offset[] = "Unused swap offset entry ";
-struct swap_list_t swap_list = {-1, -1};
+/*
+ * all active swap_info_structs
+ * protected with swap_lock, and ordered by priority.
+ */
+PLIST_HEAD(swap_active_head);
+
+/*
+ * all available (active, not full) swap_info_structs
+ * protected with swap_avail_lock, ordered by priority.
+ * This is used by get_swap_page() instead of swap_active_head
+ * because swap_active_head includes all swap_info_structs,
+ * but get_swap_page() doesn't need to look at full ones.
+ * This uses its own lock instead of swap_lock because when a
+ * swap_info_struct changes between not-full/full, it needs to
+ * add/remove itself to/from this list, but the swap_info_struct->lock
+ * is held and the locking order requires swap_lock to be taken
+ * before any swap_info_struct->lock.
+ */
+static PLIST_HEAD(swap_avail_head);
+static DEFINE_SPINLOCK(swap_avail_lock);
struct swap_info_struct *swap_info[MAX_SWAPFILES];
@@ -591,6 +609,9 @@ checks:
if (si->inuse_pages == si->pages) {
si->lowest_bit = si->max;
si->highest_bit = 0;
+ spin_lock(&swap_avail_lock);
+ plist_del(&si->avail_list, &swap_avail_head);
+ spin_unlock(&swap_avail_lock);
}
si->swap_map[offset] = usage;
inc_cluster_info_page(si, si->cluster_info, offset);
@@ -640,71 +661,65 @@ no_page:
swp_entry_t get_swap_page(void)
{
- struct swap_info_struct *si;
+ struct swap_info_struct *si, *next;
pgoff_t offset;
- int type, next;
- int wrapped = 0;
- int hp_index;
- spin_lock(&swap_lock);
if (atomic_long_read(&nr_swap_pages) <= 0)
goto noswap;
atomic_long_dec(&nr_swap_pages);
- for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
- hp_index = atomic_xchg(&highest_priority_index, -1);
- /*
- * highest_priority_index records current highest priority swap
- * type which just frees swap entries. If its priority is
- * higher than that of swap_list.next swap type, we use it. It
- * isn't protected by swap_lock, so it can be an invalid value
- * if the corresponding swap type is swapoff. We double check
- * the flags here. It's even possible the swap type is swapoff
- * and swapon again and its priority is changed. In such rare
- * case, low prority swap type might be used, but eventually
- * high priority swap will be used after several rounds of
- * swap.
- */
- if (hp_index != -1 && hp_index != type &&
- swap_info[type]->prio < swap_info[hp_index]->prio &&
- (swap_info[hp_index]->flags & SWP_WRITEOK)) {
- type = hp_index;
- swap_list.next = type;
- }
-
- si = swap_info[type];
- next = si->next;
- if (next < 0 ||
- (!wrapped && si->prio != swap_info[next]->prio)) {
- next = swap_list.head;
- wrapped++;
- }
+ spin_lock(&swap_avail_lock);
+start_over:
+ plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
+ /* requeue si to after same-priority siblings */
+ plist_requeue(&si->avail_list, &swap_avail_head);
+ spin_unlock(&swap_avail_lock);
spin_lock(&si->lock);
- if (!si->highest_bit) {
+ if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
+ spin_lock(&swap_avail_lock);
+ if (plist_node_empty(&si->avail_list)) {
+ spin_unlock(&si->lock);
+ goto nextsi;
+ }
+ WARN(!si->highest_bit,
+ "swap_info %d in list but !highest_bit\n",
+ si->type);
+ WARN(!(si->flags & SWP_WRITEOK),
+ "swap_info %d in list but !SWP_WRITEOK\n",
+ si->type);
+ plist_del(&si->avail_list, &swap_avail_head);
spin_unlock(&si->lock);
- continue;
+ goto nextsi;
}
- if (!(si->flags & SWP_WRITEOK)) {
- spin_unlock(&si->lock);
- continue;
- }
-
- swap_list.next = next;
- spin_unlock(&swap_lock);
/* This is called for allocating swap entry for cache */
offset = scan_swap_map(si, SWAP_HAS_CACHE);
spin_unlock(&si->lock);
if (offset)
- return swp_entry(type, offset);
- spin_lock(&swap_lock);
- next = swap_list.next;
+ return swp_entry(si->type, offset);
+ pr_debug("scan_swap_map of si %d failed to find offset\n",
+ si->type);
+ spin_lock(&swap_avail_lock);
+nextsi:
+ /*
+ * if we got here, it's likely that si was almost full before,
+ * and since scan_swap_map() can drop the si->lock, multiple
+ * callers probably all tried to get a page from the same si
+ * and it filled up before we could get one; or, the si filled
+ * up between us dropping swap_avail_lock and taking si->lock.
+ * Since we dropped the swap_avail_lock, the swap_avail_head
+ * list may have been modified; so if next is still in the
+ * swap_avail_head list then try it, otherwise start over.
+ */
+ if (plist_node_empty(&next->avail_list))
+ goto start_over;
}
+ spin_unlock(&swap_avail_lock);
+
atomic_long_inc(&nr_swap_pages);
noswap:
- spin_unlock(&swap_lock);
return (swp_entry_t) {0};
}
@@ -766,27 +781,6 @@ out:
return NULL;
}
-/*
- * This swap type frees swap entry, check if it is the highest priority swap
- * type which just frees swap entry. get_swap_page() uses
- * highest_priority_index to search highest priority swap type. The
- * swap_info_struct.lock can't protect us if there are multiple swap types
- * active, so we use atomic_cmpxchg.
- */
-static void set_highest_priority_index(int type)
-{
- int old_hp_index, new_hp_index;
-
- do {
- old_hp_index = atomic_read(&highest_priority_index);
- if (old_hp_index != -1 &&
- swap_info[old_hp_index]->prio >= swap_info[type]->prio)
- break;
- new_hp_index = type;
- } while (atomic_cmpxchg(&highest_priority_index,
- old_hp_index, new_hp_index) != old_hp_index);
-}
-
static unsigned char swap_entry_free(struct swap_info_struct *p,
swp_entry_t entry, unsigned char usage)
{
@@ -828,9 +822,18 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
dec_cluster_info_page(p, p->cluster_info, offset);
if (offset < p->lowest_bit)
p->lowest_bit = offset;
- if (offset > p->highest_bit)
+ if (offset > p->highest_bit) {
+ bool was_full = !p->highest_bit;
p->highest_bit = offset;
- set_highest_priority_index(p->type);
+ if (was_full && (p->flags & SWP_WRITEOK)) {
+ spin_lock(&swap_avail_lock);
+ WARN_ON(!plist_node_empty(&p->avail_list));
+ if (plist_node_empty(&p->avail_list))
+ plist_add(&p->avail_list,
+ &swap_avail_head);
+ spin_unlock(&swap_avail_lock);
+ }
+ }
atomic_long_inc(&nr_swap_pages);
p->inuse_pages--;
frontswap_invalidate_page(p->type, offset);
@@ -1765,30 +1768,37 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
unsigned char *swap_map,
struct swap_cluster_info *cluster_info)
{
- int i, prev;
-
if (prio >= 0)
p->prio = prio;
else
p->prio = --least_priority;
+ /*
+ * the plist prio is negated because plist ordering is
+ * low-to-high, while swap ordering is high-to-low
+ */
+ p->list.prio = -p->prio;
+ p->avail_list.prio = -p->prio;
p->swap_map = swap_map;
p->cluster_info = cluster_info;
p->flags |= SWP_WRITEOK;
atomic_long_add(p->pages, &nr_swap_pages);
total_swap_pages += p->pages;
- /* insert swap space into swap_list: */
- prev = -1;
- for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
- if (p->prio >= swap_info[i]->prio)
- break;
- prev = i;
- }
- p->next = i;
- if (prev < 0)
- swap_list.head = swap_list.next = p->type;
- else
- swap_info[prev]->next = p->type;
+ assert_spin_locked(&swap_lock);
+ /*
+ * both lists are plists, and thus priority ordered.
+ * swap_active_head needs to be priority ordered for swapoff(),
+ * which on removal of any swap_info_struct with an auto-assigned
+ * (i.e. negative) priority increments the auto-assigned priority
+ * of any lower-priority swap_info_structs.
+ * swap_avail_head needs to be priority ordered for get_swap_page(),
+ * which allocates swap pages from the highest available priority
+ * swap_info_struct.
+ */
+ plist_add(&p->list, &swap_active_head);
+ spin_lock(&swap_avail_lock);
+ plist_add(&p->avail_list, &swap_avail_head);
+ spin_unlock(&swap_avail_lock);
}
static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1823,8 +1833,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
struct address_space *mapping;
struct inode *inode;
struct filename *pathname;
- int i, type, prev;
- int err;
+ int err, found = 0;
unsigned int old_block_size;
if (!capable(CAP_SYS_ADMIN))
@@ -1842,17 +1851,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
goto out;
mapping = victim->f_mapping;
- prev = -1;
spin_lock(&swap_lock);
- for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
- p = swap_info[type];
+ plist_for_each_entry(p, &swap_active_head, list) {
if (p->flags & SWP_WRITEOK) {
- if (p->swap_file->f_mapping == mapping)
+ if (p->swap_file->f_mapping == mapping) {
+ found = 1;
break;
+ }
}
- prev = type;
}
- if (type < 0) {
+ if (!found) {
err = -EINVAL;
spin_unlock(&swap_lock);
goto out_dput;
@@ -1864,20 +1872,21 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&swap_lock);
goto out_dput;
}
- if (prev < 0)
- swap_list.head = p->next;
- else
- swap_info[prev]->next = p->next;
- if (type == swap_list.next) {
- /* just pick something that's safe... */
- swap_list.next = swap_list.head;
- }
+ spin_lock(&swap_avail_lock);
+ plist_del(&p->avail_list, &swap_avail_head);
+ spin_unlock(&swap_avail_lock);
spin_lock(&p->lock);
if (p->prio < 0) {
- for (i = p->next; i >= 0; i = swap_info[i]->next)
- swap_info[i]->prio = p->prio--;
+ struct swap_info_struct *si = p;
+
+ plist_for_each_entry_continue(si, &swap_active_head, list) {
+ si->prio++;
+ si->list.prio--;
+ si->avail_list.prio--;
+ }
least_priority++;
}
+ plist_del(&p->list, &swap_active_head);
atomic_long_sub(p->pages, &nr_swap_pages);
total_swap_pages -= p->pages;
p->flags &= ~SWP_WRITEOK;
@@ -1885,7 +1894,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&swap_lock);
set_current_oom_origin();
- err = try_to_unuse(type, false, 0); /* force all pages to be unused */
+ err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
clear_current_oom_origin();
if (err) {
@@ -1926,7 +1935,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
frontswap_map = frontswap_map_get(p);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
- frontswap_invalidate_area(type);
+ frontswap_invalidate_area(p->type);
frontswap_map_set(p, NULL);
mutex_unlock(&swapon_mutex);
free_percpu(p->percpu_cluster);
@@ -1935,7 +1944,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
vfree(cluster_info);
vfree(frontswap_map);
/* Destroy swap account information */
- swap_cgroup_swapoff(type);
+ swap_cgroup_swapoff(p->type);
inode = mapping->host;
if (S_ISBLK(inode->i_mode)) {
@@ -2142,8 +2151,9 @@ static struct swap_info_struct *alloc_swap_info(void)
*/
}
INIT_LIST_HEAD(&p->first_swap_extent.list);
+ plist_node_init(&p->list, 0);
+ plist_node_init(&p->avail_list, 0);
p->flags = SWP_USED;
- p->next = -1;
spin_unlock(&swap_lock);
spin_lock_init(&p->lock);
diff --git a/mm/util.c b/mm/util.c
index d5ea733c5082..8f326edce752 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/security.h>
@@ -65,6 +66,35 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp)
EXPORT_SYMBOL(kstrndup);
/**
+ * kstrimdup - Trim and copy a %NUL terminated string.
+ * @s: the string to trim and duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns an address, which the caller must kfree, containing
+ * a duplicate of the passed string with leading and/or trailing
+ * whitespace (as defined by isspace) removed.
+ */
+char *kstrimdup(const char *s, gfp_t gfp)
+{
+ char *buf;
+ char *begin = skip_spaces(s);
+ size_t len = strlen(begin);
+
+ while (len && isspace(begin[len - 1]))
+ len--;
+
+ buf = kmalloc_track_caller(len + 1, gfp);
+ if (!buf)
+ return NULL;
+
+ memcpy(buf, begin, len);
+ buf[len] = '\0';
+
+ return buf;
+}
+EXPORT_SYMBOL(kstrimdup);
+
+/**
* kmemdup - duplicate region of memory
*
* @src: memory region to duplicate
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 1037a3bab505..9f25af825dec 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -17,6 +17,16 @@ void vmacache_flush_all(struct mm_struct *mm)
{
struct task_struct *g, *p;
+ /*
+ * Single threaded tasks need not iterate the entire
+ * list of process. We can avoid the flushing as well
+ * since the mm's seqnum was increased and don't have
+ * to worry about other threads' seqnum. Current's
+ * flush will occur upon the next lookup.
+ */
+ if (atomic_read(&mm->mm_users) == 1)
+ return;
+
rcu_read_lock();
for_each_process_thread(g, p) {
/*
@@ -78,6 +88,8 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
if (!vmacache_valid(mm))
return NULL;
+ count_vm_vmacache_event(VMACACHE_FIND_CALLS);
+
for (i = 0; i < VMACACHE_SIZE; i++) {
struct vm_area_struct *vma = current->vmacache[i];
@@ -85,8 +97,10 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
continue;
if (WARN_ON_ONCE(vma->vm_mm != mm))
break;
- if (vma->vm_start <= addr && vma->vm_end > addr)
+ if (vma->vm_start <= addr && vma->vm_end > addr) {
+ count_vm_vmacache_event(VMACACHE_FIND_HITS);
return vma;
+ }
}
return NULL;
@@ -102,11 +116,15 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
if (!vmacache_valid(mm))
return NULL;
+ count_vm_vmacache_event(VMACACHE_FIND_CALLS);
+
for (i = 0; i < VMACACHE_SIZE; i++) {
struct vm_area_struct *vma = current->vmacache[i];
- if (vma && vma->vm_start == start && vma->vm_end == end)
+ if (vma && vma->vm_start == start && vma->vm_end == end) {
+ count_vm_vmacache_event(VMACACHE_FIND_HITS);
return vma;
+ }
}
return NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index bf233b283319..2ed75fb89fc1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1496,7 +1496,7 @@ void vfree(const void *addr)
if (!addr)
return;
if (unlikely(in_interrupt())) {
- struct vfree_deferred *p = &__get_cpu_var(vfree_deferred);
+ struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
if (llist_add((struct llist_node *)addr, &p->list))
schedule_work(&p->wq);
} else
@@ -2619,19 +2619,19 @@ static int s_show(struct seq_file *m, void *p)
seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
if (v->flags & VM_IOREMAP)
- seq_printf(m, " ioremap");
+ seq_puts(m, " ioremap");
if (v->flags & VM_ALLOC)
- seq_printf(m, " vmalloc");
+ seq_puts(m, " vmalloc");
if (v->flags & VM_MAP)
- seq_printf(m, " vmap");
+ seq_puts(m, " vmap");
if (v->flags & VM_USERMAP)
- seq_printf(m, " user");
+ seq_puts(m, " user");
if (v->flags & VM_VPAGES)
- seq_printf(m, " vpages");
+ seq_puts(m, " vpages");
show_numa_info(m, v);
seq_putc(m, '\n');
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c2dba6ac685..7f8504198d41 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -11,6 +11,8 @@
* Multiqueue VM started 5.8.00, Rik van Riel.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/gfp.h>
@@ -43,6 +45,7 @@
#include <linux/sysctl.h>
#include <linux/oom.h>
#include <linux/prefetch.h>
+#include <linux/printk.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -83,6 +86,9 @@ struct scan_control {
/* Scan (total_size >> priority) pages at once */
int priority;
+ /* anon vs. file LRUs scanning "ratio" */
+ int swappiness;
+
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
@@ -324,7 +330,7 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
else
new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
- trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
+ trace_mm_shrink_slab_end(shrinker, nid, freed, nr, new_nr, total_scan);
return freed;
}
@@ -477,7 +483,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
- printk("%s: orphaned page\n", __func__);
+ pr_info("%s: orphaned page\n", __func__);
return PAGE_CLEAN;
}
}
@@ -1121,7 +1127,7 @@ keep:
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
- free_hot_cold_page_list(&free_pages, 1);
+ free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
@@ -1439,6 +1445,19 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
}
/*
+ * If a kernel thread (such as nfsd for loop-back mounts) services
+ * a backing device by writing to the page cache it sets PF_LESS_THROTTLE.
+ * In that case we should only throttle if the backing device it is
+ * writing to is congested. In other cases it is safe to throttle.
+ */
+static int current_may_throttle(void)
+{
+ return !(current->flags & PF_LESS_THROTTLE) ||
+ current->backing_dev_info == NULL ||
+ bdi_write_congested(current->backing_dev_info);
+}
+
+/*
* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
* of reclaimed pages
*/
@@ -1519,7 +1538,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_unlock_irq(&zone->lru_lock);
- free_hot_cold_page_list(&page_list, 1);
+ free_hot_cold_page_list(&page_list, true);
/*
* If reclaim is isolating dirty pages under writeback, it implies
@@ -1566,7 +1585,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* implies that pages are cycling through the LRU faster than
* they are written so also forcibly stall.
*/
- if (nr_unqueued_dirty == nr_taken || nr_immediate)
+ if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
+ current_may_throttle())
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
@@ -1575,7 +1595,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* is congested. Allow kswapd to continue until it starts encountering
* unqueued dirty pages or cycling through the LRU too quickly.
*/
- if (!sc->hibernation_mode && !current_is_kswapd())
+ if (!sc->hibernation_mode && !current_is_kswapd() &&
+ current_may_throttle())
wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
@@ -1740,7 +1761,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
- free_hot_cold_page_list(&l_hold, 1);
+ free_hot_cold_page_list(&l_hold, true);
}
#ifdef CONFIG_SWAP
@@ -1830,13 +1851,6 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
}
-static int vmscan_swappiness(struct scan_control *sc)
-{
- if (global_reclaim(sc))
- return vm_swappiness;
- return mem_cgroup_swappiness(sc->target_mem_cgroup);
-}
-
enum scan_balance {
SCAN_EQUAL,
SCAN_FRACT,
@@ -1866,6 +1880,8 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
bool force_scan = false;
unsigned long ap, fp;
enum lru_list lru;
+ bool some_scanned;
+ int pass;
/*
* If the zone or memcg is small, nr[l] can be 0. This
@@ -1895,7 +1911,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !vmscan_swappiness(sc)) {
+ if (!global_reclaim(sc) && !sc->swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1905,7 +1921,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && vmscan_swappiness(sc)) {
+ if (!sc->priority && sc->swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
@@ -1948,7 +1964,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = vmscan_swappiness(sc);
+ anon_prio = sc->swappiness;
file_prio = 200 - anon_prio;
/*
@@ -1989,39 +2005,49 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
fraction[1] = fp;
denominator = ap + fp + 1;
out:
- for_each_evictable_lru(lru) {
- int file = is_file_lru(lru);
- unsigned long size;
- unsigned long scan;
+ some_scanned = false;
+ /* Only use force_scan on second pass. */
+ for (pass = 0; !some_scanned && pass < 2; pass++) {
+ for_each_evictable_lru(lru) {
+ int file = is_file_lru(lru);
+ unsigned long size;
+ unsigned long scan;
- size = get_lru_size(lruvec, lru);
- scan = size >> sc->priority;
+ size = get_lru_size(lruvec, lru);
+ scan = size >> sc->priority;
- if (!scan && force_scan)
- scan = min(size, SWAP_CLUSTER_MAX);
+ if (!scan && pass && force_scan)
+ scan = min(size, SWAP_CLUSTER_MAX);
- switch (scan_balance) {
- case SCAN_EQUAL:
- /* Scan lists relative to size */
- break;
- case SCAN_FRACT:
+ switch (scan_balance) {
+ case SCAN_EQUAL:
+ /* Scan lists relative to size */
+ break;
+ case SCAN_FRACT:
+ /*
+ * Scan types proportional to swappiness and
+ * their relative recent reclaim efficiency.
+ */
+ scan = div64_u64(scan * fraction[file],
+ denominator);
+ break;
+ case SCAN_FILE:
+ case SCAN_ANON:
+ /* Scan one type exclusively */
+ if ((scan_balance == SCAN_FILE) != file)
+ scan = 0;
+ break;
+ default:
+ /* Look ma, no brain */
+ BUG();
+ }
+ nr[lru] = scan;
/*
- * Scan types proportional to swappiness and
- * their relative recent reclaim efficiency.
+ * Skip the second pass and don't force_scan,
+ * if we found something to scan.
*/
- scan = div64_u64(scan * fraction[file], denominator);
- break;
- case SCAN_FILE:
- case SCAN_ANON:
- /* Scan one type exclusively */
- if ((scan_balance == SCAN_FILE) != file)
- scan = 0;
- break;
- default:
- /* Look ma, no brain */
- BUG();
+ some_scanned |= !!scan;
}
- nr[lru] = scan;
}
}
@@ -2203,9 +2229,21 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+/**
+ * __shrink_zone - shrinks a given zone
+ *
+ * @zone: zone to shrink
+ * @sc: scan control with additional reclaim parameters
+ * @honor_memcg_guarantee: do not reclaim memcgs which are within their memory
+ * guarantee
+ *
+ * Returns the number of reclaimed memcgs.
+ */
+static unsigned __shrink_zone(struct zone *zone, struct scan_control *sc,
+ bool honor_memcg_guarantee)
{
unsigned long nr_reclaimed, nr_scanned;
+ unsigned nr_scanned_groups = 0;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2222,8 +2260,22 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
do {
struct lruvec *lruvec;
+ /* Memcg might be protected from the reclaim */
+ if (honor_memcg_guarantee &&
+ mem_cgroup_within_guarantee(memcg, root)) {
+ /*
+ * It would be more optimal to skip the memcg
+ * subtree now but we do not have a memcg iter
+ * helper for that. Anyone?
+ */
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ continue;
+ }
+
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ nr_scanned_groups++;
+ sc->swappiness = mem_cgroup_swappiness(memcg);
shrink_lruvec(lruvec, sc);
/*
@@ -2250,6 +2302,20 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return nr_scanned_groups;
+}
+
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
+{
+ if (!__shrink_zone(zone, sc, true)) {
+ /*
+ * First round of reclaim didn't find anything to reclaim
+ * because of the memory guantees for all memcgs in the
+ * reclaim target so try again and ignore guarantees this time.
+ */
+ __shrink_zone(zone, sc, false);
+ }
}
/* Returns true if compaction should go ahead for a high-order request */
@@ -2525,10 +2591,17 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
for (i = 0; i <= ZONE_NORMAL; i++) {
zone = &pgdat->node_zones[i];
+ if (!populated_zone(zone))
+ continue;
+
pfmemalloc_reserve += min_wmark_pages(zone);
free_pages += zone_page_state(zone, NR_FREE_PAGES);
}
+ /* If there are no reserves (unexpected config) then do not throttle */
+ if (!pfmemalloc_reserve)
+ return true;
+
wmark_ok = free_pages > pfmemalloc_reserve / 2;
/* kswapd must be awake if processes are being throttled */
@@ -2553,9 +2626,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
nodemask_t *nodemask)
{
+ struct zoneref *z;
struct zone *zone;
- int high_zoneidx = gfp_zone(gfp_mask);
- pg_data_t *pgdat;
+ pg_data_t *pgdat = NULL;
/*
* Kernel threads should not be throttled as they may be indirectly
@@ -2574,10 +2647,34 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
if (fatal_signal_pending(current))
goto out;
- /* Check if the pfmemalloc reserves are ok */
- first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
- pgdat = zone->zone_pgdat;
- if (pfmemalloc_watermark_ok(pgdat))
+ /*
+ * Check if the pfmemalloc reserves are ok by finding the first node
+ * with a usable ZONE_NORMAL or lower zone. The expectation is that
+ * GFP_KERNEL will be required for allocating network buffers when
+ * swapping over the network so ZONE_HIGHMEM is unusable.
+ *
+ * Throttling is based on the first usable node and throttled processes
+ * wait on a queue until kswapd makes progress and wakes them. There
+ * is an affinity then between processes waking up and where reclaim
+ * progress has been made assuming the process wakes on the same node.
+ * More importantly, processes running on remote nodes will not compete
+ * for remote pfmemalloc reserves and processes on different nodes
+ * should make reasonable progress.
+ */
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ gfp_mask, nodemask) {
+ if (zone_idx(zone) > ZONE_NORMAL)
+ continue;
+
+ /* Throttle based on the first usable node */
+ pgdat = zone->zone_pgdat;
+ if (pfmemalloc_watermark_ok(pgdat))
+ goto out;
+ break;
+ }
+
+ /* If no zone was usable by the allocation flags then do not throttle */
+ if (!pgdat)
goto out;
/* Account for the throttling */
@@ -2660,6 +2757,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
.may_swap = !noswap,
.order = 0,
.priority = 0,
+ .swappiness = mem_cgroup_swappiness(memcg),
.target_mem_cgroup = memcg,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
@@ -3422,7 +3520,7 @@ int kswapd_run(int nid)
/*
* Called by memory hotplug when all memory in a node is offlined. Caller must
- * hold lock_memory_hotplug().
+ * hold mem_hotplug_begin/end().
*/
void kswapd_stop(int nid)
{
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 302dd076b8bf..b37bd49bfd55 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -207,7 +207,9 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
}
/*
- * For use when we know that interrupts are disabled.
+ * For use when we know that interrupts are disabled,
+ * or when we know that preemption is disabled and that
+ * particular counter cannot be updated from interrupt context.
*/
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
int delta)
@@ -489,7 +491,7 @@ static void refresh_cpu_vm_stats(void)
continue;
if (__this_cpu_read(p->pcp.count))
- drain_zone_pages(zone, __this_cpu_ptr(&p->pcp));
+ drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
#endif
}
fold_diff(global_diff);
@@ -866,6 +868,10 @@ const char * const vmstat_text[] = {
"nr_tlb_local_flush_one",
#endif /* CONFIG_DEBUG_TLBFLUSH */
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+ "vmacache_find_calls",
+ "vmacache_find_hits",
+#endif
#endif /* CONFIG_VM_EVENTS_COUNTERS */
};
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
@@ -1226,7 +1232,7 @@ int sysctl_stat_interval __read_mostly = HZ;
static void vmstat_update(struct work_struct *w)
{
refresh_cpu_vm_stats();
- schedule_delayed_work(&__get_cpu_var(vmstat_work),
+ schedule_delayed_work(this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
}
diff --git a/mm/zbud.c b/mm/zbud.c
index 9451361e6aa7..01df13a7e2e1 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -247,7 +247,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
* gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
* a new page.
*/
-int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
unsigned long *handle)
{
int chunks, i, freechunks;
@@ -255,7 +255,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
enum buddy bud;
struct page *page;
- if (size <= 0 || gfp & __GFP_HIGHMEM)
+ if (!size || (gfp & __GFP_HIGHMEM))
return -EINVAL;
if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
return -ENOSPC;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 36b4591a7a2d..5ae5d85b629d 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1082,7 +1082,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
class = &pool->size_class[class_idx];
off = obj_idx_to_offset(page, obj_idx, class->size);
- area = &__get_cpu_var(zs_map_area);
+ area = this_cpu_ptr(&zs_map_area);
if (off + class->size <= PAGE_SIZE)
kunmap_atomic(area->vm_addr);
else {
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 34eb2160489d..e7ff52a39ec9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -24,6 +24,7 @@ my $emacs = 0;
my $terse = 0;
my $file = 0;
my $check = 0;
+my $check_orig = 0;
my $summary = 1;
my $mailback = 0;
my $summary_file = 0;
@@ -146,6 +147,7 @@ GetOptions(
help(0) if ($help);
$fix = 1 if ($fix_inplace);
+$check_orig = $check;
my $exit = 0;
@@ -397,6 +399,11 @@ foreach my $entry (@mode_permission_funcs) {
$mode_perms_search .= $entry->[0];
}
+our $declaration_macros = qr{(?x:
+ (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
+ (?:$Storage\s+)?LIST_HEAD\s*\(
+)};
+
our $allowed_asm_includes = qr{(?x:
irq|
memory
@@ -1808,11 +1815,13 @@ sub process {
$here = "#$linenr: " if (!$file);
$here = "#$realline: " if ($file);
+ my $found_file = 0;
# extract the filename as it passes
if ($line =~ /^diff --git.*?(\S+)$/) {
$realfile = $1;
$realfile =~ s@^([^/]*)/@@ if (!$file);
$in_commit_log = 0;
+ $found_file = 1;
} elsif ($line =~ /^\+\+\+\s+(\S+)/) {
$realfile = $1;
$realfile =~ s@^([^/]*)/@@ if (!$file);
@@ -1829,6 +1838,15 @@ sub process {
ERROR("MODIFIED_INCLUDE_ASM",
"do not modify files in include/asm, change architecture specific files in include/asm-<architecture>\n" . "$here$rawline\n");
}
+ $found_file = 1;
+ }
+
+ if ($found_file) {
+ if ($realfile =~ m@^(drivers/net/|net/)@) {
+ $check = 1;
+ } else {
+ $check = $check_orig;
+ }
next;
}
@@ -2093,8 +2111,10 @@ sub process {
foreach my $compat (@compats) {
my $compat2 = $compat;
- $compat2 =~ s/\,[a-z]*\-/\,<\.\*>\-/;
- `grep -Erq "$compat|$compat2" $dt_path`;
+ $compat2 =~ s/\,[a-zA-Z0-9]*\-/\,<\.\*>\-/;
+ my $compat3 = $compat;
+ $compat3 =~ s/\,([a-z]*)[0-9]*\-/\,$1<\.\*>\-/;
+ `grep -Erq "$compat|$compat2|$compat3" $dt_path`;
if ( $? >> 8 ) {
WARN("UNDOCUMENTED_DT_STRING",
"DT compatible string \"$compat\" appears un-documented -- check $dt_path\n" . $herecurr);
@@ -2266,18 +2286,37 @@ sub process {
}
# check for missing blank lines after declarations
- if ($realfile =~ m@^(drivers/net/|net/)@ &&
- $prevline =~ /^\+\s+$Declare\s+$Ident/ &&
- !($prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
- $prevline =~ /(?:\{\s*|\\)$/) && #extended lines
- $sline =~ /^\+\s+/ && #Not at char 1
- !($sline =~ /^\+\s+$Declare/ ||
- $sline =~ /^\+\s+$Ident\s+$Ident/ || #eg: typedef foo
+ if ($sline =~ /^\+\s+\S/ && #Not at char 1
+ # actual declarations
+ ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # foo bar; where foo is some local typedef or #define
+ $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ # known declaration macros
+ $prevline =~ /^\+\s+$declaration_macros/) &&
+ # for "else if" which can look like "$Ident $Ident"
+ !($prevline =~ /^\+\s+$c90_Keywords\b/ ||
+ # other possible extensions of declaration lines
+ $prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
+ # not starting a section or a macro "\" extended line
+ $prevline =~ /(?:\{\s*|\\)$/) &&
+ # looks like a declaration
+ !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # foo bar; where foo is some local typedef or #define
+ $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ # known declaration macros
+ $sline =~ /^\+\s+$declaration_macros/ ||
+ # start of struct or union or enum
$sline =~ /^\+\s+(?:union|struct|enum|typedef)\b/ ||
- $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(])/ ||
- $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/)) {
+ # start or end of block or continuation of declaration
+ $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
+ # bitfield continuation
+ $sline =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
+ # other possible extensions of declaration lines
+ $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
+ # indentation of previous and current line are the same
+ (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
WARN("SPACING",
- "networking uses a blank line after declarations\n" . $hereprev);
+ "Missing a blank line after declarations\n" . $hereprev);
}
# check for spaces at the beginning of a line.
@@ -3782,6 +3821,17 @@ sub process {
WARN("DO_WHILE_MACRO_WITH_TRAILING_SEMICOLON",
"do {} while (0) macros should not be semicolon terminated\n" . "$herectx");
}
+ } elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) {
+ $ctx =~ s/\n*$//;
+ my $cnt = statement_rawlines($ctx);
+ my $herectx = $here . "\n";
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ WARN("TRAILING_SEMICOLON",
+ "macros should not use a trailing semicolon\n" . "$herectx");
}
}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 32487ed18354..e66e710cc595 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -10,6 +10,7 @@ TARGETS += timers
TARGETS += vm
TARGETS += powerpc
TARGETS += user
+TARGETS += sysctl
all:
for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
new file mode 100644
index 000000000000..0a92adaf0865
--- /dev/null
+++ b/tools/testing/selftests/sysctl/Makefile
@@ -0,0 +1,19 @@
+# Makefile for sysctl selftests.
+# Expects kernel.sysctl_writes_strict=1.
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
+all:
+
+# Allow specific tests to be selected.
+test_num:
+ @/bin/sh ./run_numerictests
+
+test_string:
+ @/bin/sh ./run_stringtests
+
+run_tests: all test_num test_string
+
+# Nothing to clean up.
+clean:
+
+.PHONY: all run_tests clean test_num test_string
diff --git a/tools/testing/selftests/sysctl/common_tests b/tools/testing/selftests/sysctl/common_tests
new file mode 100644
index 000000000000..17d534b1b7b4
--- /dev/null
+++ b/tools/testing/selftests/sysctl/common_tests
@@ -0,0 +1,109 @@
+#!/bin/sh
+
+TEST_FILE=$(mktemp)
+
+echo "== Testing sysctl behavior against ${TARGET} =="
+
+set_orig()
+{
+ echo "${ORIG}" > "${TARGET}"
+}
+
+set_test()
+{
+ echo "${TEST_STR}" > "${TARGET}"
+}
+
+verify()
+{
+ local seen
+ seen=$(cat "$1")
+ if [ "${seen}" != "${TEST_STR}" ]; then
+ return 1
+ fi
+ return 0
+}
+
+trap 'set_orig; rm -f "${TEST_FILE}"' EXIT
+
+rc=0
+
+echo -n "Writing test file ... "
+echo "${TEST_STR}" > "${TEST_FILE}"
+if ! verify "${TEST_FILE}"; then
+ echo "FAIL" >&2
+ exit 1
+else
+ echo "ok"
+fi
+
+echo -n "Checking sysctl is not set to test value ... "
+if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+else
+ echo "ok"
+fi
+
+echo -n "Writing sysctl from shell ... "
+set_test
+if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+else
+ echo "ok"
+fi
+
+echo -n "Resetting sysctl to original value ... "
+set_orig
+if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+else
+ echo "ok"
+fi
+
+# Now that we've validated the sanity of "set_test" and "set_orig",
+# we can use those functions to set starting states before running
+# specific behavioral tests.
+
+echo -n "Writing entire sysctl in single write ... "
+set_orig
+dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
+if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Writing middle of sysctl after synchronized seek ... "
+set_test
+dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
+if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Writing beyond end of sysctl ... "
+set_orig
+dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
+if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Writing sysctl with multiple long writes ... "
+set_orig
+(perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" bs=50 2>/dev/null
+if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests
new file mode 100644
index 000000000000..8510f93f2d14
--- /dev/null
+++ b/tools/testing/selftests/sysctl/run_numerictests
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+SYSCTL="/proc/sys"
+TARGET="${SYSCTL}/vm/swappiness"
+ORIG=$(cat "${TARGET}")
+TEST_STR=$(( $ORIG + 1 ))
+
+. ./common_tests
+
+exit $rc
diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests
new file mode 100644
index 000000000000..90a9293d520c
--- /dev/null
+++ b/tools/testing/selftests/sysctl/run_stringtests
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+SYSCTL="/proc/sys"
+TARGET="${SYSCTL}/kernel/domainname"
+ORIG=$(cat "${TARGET}")
+TEST_STR="Testing sysctl"
+
+. ./common_tests
+
+# Only string sysctls support seeking/appending.
+MAXLEN=65
+
+echo -n "Writing entire sysctl in short writes ... "
+set_orig
+dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
+if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Writing middle of sysctl after unsynchronized seek ... "
+set_test
+dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
+if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
+set_orig
+perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
+ dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+if ! grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Checking sysctl keeps original string on overflow append ... "
+set_orig
+perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+ dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
+if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Checking sysctl stays NULL terminated on write ... "
+set_orig
+perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+ dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+echo -n "Checking sysctl stays NULL terminated on overwrite ... "
+set_orig
+perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
+ dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
+if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+else
+ echo "ok"
+fi
+
+exit $rc
diff --git a/usr/Kconfig b/usr/Kconfig
index 642f503d3e9f..2d4c77eecf2e 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -98,80 +98,3 @@ config RD_LZ4
help
Support loading of a LZ4 encoded initial ramdisk or cpio buffer
If unsure, say N.
-
-choice
- prompt "Built-in initramfs compression mode" if INITRAMFS_SOURCE!=""
- help
- This option decides by which algorithm the builtin initramfs
- will be compressed. Several compression algorithms are
- available, which differ in efficiency, compression and
- decompression speed. Compression speed is only relevant
- when building a kernel. Decompression speed is relevant at
- each boot.
-
- If you have any problems with bzip2 or LZMA compressed
- initramfs, mail me (Alain Knaff) <alain@knaff.lu>.
-
- High compression options are mostly useful for users who are
- low on RAM, since it reduces the memory consumption during
- boot.
-
- If in doubt, select 'gzip'
-
-config INITRAMFS_COMPRESSION_NONE
- bool "None"
- help
- Do not compress the built-in initramfs at all. This may
- sound wasteful in space, but, you should be aware that the
- built-in initramfs will be compressed at a later stage
- anyways along with the rest of the kernel, on those
- architectures that support this.
- However, not compressing the initramfs may lead to slightly
- higher memory consumption during a short time at boot, while
- both the cpio image and the unpacked filesystem image will
- be present in memory simultaneously
-
-config INITRAMFS_COMPRESSION_GZIP
- bool "Gzip"
- depends on RD_GZIP
- help
- The old and tried gzip compression. It provides a good balance
- between compression ratio and decompression speed.
-
-config INITRAMFS_COMPRESSION_BZIP2
- bool "Bzip2"
- depends on RD_BZIP2
- help
- Its compression ratio and speed is intermediate.
- Decompression speed is slowest among the choices. The initramfs
- size is about 10% smaller with bzip2, in comparison to gzip.
- Bzip2 uses a large amount of memory. For modern kernels you
- will need at least 8MB RAM or more for booting.
-
-config INITRAMFS_COMPRESSION_LZMA
- bool "LZMA"
- depends on RD_LZMA
- help
- This algorithm's compression ratio is best.
- Decompression speed is between the other choices.
- Compression is slowest. The initramfs size is about 33%
- smaller with LZMA in comparison to gzip.
-
-config INITRAMFS_COMPRESSION_XZ
- bool "XZ"
- depends on RD_XZ
- help
- XZ uses the LZMA2 algorithm. The initramfs size is about 30%
- smaller with XZ in comparison to gzip. Decompression speed
- is better than that of bzip2 but worse than gzip and LZO.
- Compression is slow.
-
-config INITRAMFS_COMPRESSION_LZO
- bool "LZO"
- depends on RD_LZO
- help
- Its compression ratio is the poorest among the choices. The kernel
- size is about 10% bigger than gzip; however its speed
- (both compression and decompression) is the fastest.
-
-endchoice