summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap3
-rw-r--r--Documentation/ABI/testing/securityfs-secrets-coco51
-rw-r--r--Documentation/ABI/testing/sysfs-fs-erofs5
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.rst2
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst2
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst36
-rw-r--r--Documentation/RCU/arrayRCU.rst4
-rw-r--r--Documentation/RCU/checklist.rst9
-rw-r--r--Documentation/RCU/rcu.rst13
-rw-r--r--Documentation/RCU/rculist_nulls.rst2
-rw-r--r--Documentation/RCU/stallwarn.rst20
-rw-r--r--Documentation/RCU/whatisRCU.rst18
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt156
-rw-r--r--Documentation/arm64/memory-tagging-extension.rst4
-rw-r--r--Documentation/arm64/silicon-errata.rst3
-rw-r--r--Documentation/cdrom/cdrom-standard.rst10
-rw-r--r--Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/imx8m-clock.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,mpfs.yaml13
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/renesas,du.yaml23
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,gpi.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml2
-rw-r--r--Documentation/devicetree/bindings/leds/leds-mt6360.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-flexcom.txt2
-rw-r--r--Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml7
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmi-nand.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,c_can.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/dsa/realtek.yaml35
-rw-r--r--Documentation/devicetree/bindings/pci/apple,pcie.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml10
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml4
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml12
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml2
-rw-r--r--Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml10
-rw-r--r--Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml15
-rw-r--r--Documentation/devicetree/bindings/serial/samsung_uart.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml2
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml1
-rw-r--r--Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml3
-rw-r--r--Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml1
-rw-r--r--Documentation/driver-api/libata.rst11
-rw-r--r--Documentation/filesystems/ext4/attributes.rst2
-rw-r--r--Documentation/filesystems/f2fs.rst70
-rw-r--r--Documentation/filesystems/proc.rst92
-rw-r--r--Documentation/filesystems/zonefs.rst52
-rw-r--r--Documentation/networking/ip-sysctl.rst7
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst7
-rw-r--r--Documentation/security/index.rst1
-rw-r--r--Documentation/security/secrets/coco.rst103
-rw-r--r--Documentation/security/secrets/index.rst9
-rw-r--r--Documentation/security/siphash.rst46
-rw-r--r--Documentation/userspace-api/ioctl/cdrom.rst6
-rw-r--r--Documentation/virt/kvm/api.rst24
-rw-r--r--Documentation/vm/page_owner.rst5
-rw-r--r--MAINTAINERS57
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/arc/boot/dts/hsdk.dts2
-rw-r--r--arch/arc/include/asm/atomic-llsc.h32
-rw-r--r--arch/arc/include/asm/pgtable-levels.h3
-rw-r--r--arch/arc/kernel/disasm.c3
-rw-r--r--arch/arc/kernel/entry.S1
-rw-r--r--arch/arc/kernel/signal.c2
-rw-r--r--arch/arc/kernel/smp.c4
-rw-r--r--arch/arc/kernel/unaligned.c2
-rw-r--r--arch/arc/mm/cache.c2
-rw-r--r--arch/arm/boot/dts/am33xx-l4.dtsi2
-rw-r--r--arch/arm/boot/dts/am3517-evm.dts45
-rw-r--r--arch/arm/boot/dts/am3517-som.dtsi9
-rw-r--r--arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts4
-rw-r--r--arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi10
-rw-r--r--arch/arm/boot/dts/aspeed-g6.dtsi10
-rw-r--r--arch/arm/boot/dts/at91-dvk_su60_somc.dtsi2
-rw-r--r--arch/arm/boot/dts/at91-kizbox3-hs.dts2
-rw-r--r--arch/arm/boot/dts/at91-kizbox3_common.dtsi2
-rw-r--r--arch/arm/boot/dts/at91-q5xr5.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi2
-rw-r--r--arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama5d2_xplained.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama5d3_xplained.dts8
-rw-r--r--arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi2
-rw-r--r--arch/arm/boot/dts/at91-sama5d4_xplained.dts6
-rw-r--r--arch/arm/boot/dts/at91-sama5d4ek.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama7g5ek.dts4
-rw-r--r--arch/arm/boot/dts/at91-vinco.dts2
-rw-r--r--arch/arm/boot/dts/at91rm9200ek.dts4
-rw-r--r--arch/arm/boot/dts/at91sam9260ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9261ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9263ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9g20ek_common.dtsi45
-rw-r--r--arch/arm/boot/dts/at91sam9m10g45ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9n12ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9rlek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9x5ek.dtsi2
-rw-r--r--arch/arm/boot/dts/dra7-l4.dtsi4
-rw-r--r--arch/arm/boot/dts/imx6qdl-apalis.dtsi10
-rw-r--r--arch/arm/boot/dts/imx6ull-colibri.dtsi2
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts15
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts15
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi15
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dtsi2
-rw-r--r--arch/arm/boot/dts/sama5d3xmb.dtsi2
-rw-r--r--arch/arm/boot/dts/sama5d3xmb_cmp.dtsi2
-rw-r--r--arch/arm/boot/dts/sama7g5.dtsi18
-rw-r--r--arch/arm/boot/dts/usb_a9263.dts2
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/tegra_defconfig3
-rw-r--r--arch/arm/include/asm/io.h3
-rw-r--r--arch/arm/kernel/entry-armv.S2
-rw-r--r--arch/arm/mach-exynos/Kconfig1
-rw-r--r--arch/arm/mach-omap2/omap4-common.c2
-rw-r--r--arch/arm/mm/ioremap.c8
-rw-r--r--arch/arm/mm/proc-v7-bugs.c1
-rw-r--r--arch/arm/xen/enlighten.c9
-rw-r--r--arch/arm64/Kconfig15
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi40
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi40
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-s4.dtsi8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts1
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1.dtsi20
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn.dtsi10
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm.dtsi2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi8
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts8
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi6
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi6
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi6
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi8
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts8
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210-smaug.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250-mtp.dts12
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts40
-rw-r--r--arch/arm64/include/asm/io.h4
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h1
-rw-r--r--arch/arm64/include/asm/pgtable.h4
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/cpu_errata.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/elfcore.c2
-rw-r--r--arch/arm64/kernel/mte.c3
-rw-r--r--arch/arm64/kernel/paravirt.c29
-rw-r--r--arch/arm64/kernel/relocate_kernel.S22
-rw-r--r--arch/arm64/kernel/vdso/Makefile3
-rw-r--r--arch/arm64/kernel/vdso32/Makefile3
-rw-r--r--arch/arm64/kvm/arm.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S18
-rw-r--r--arch/arm64/kvm/inject_fault.c28
-rw-r--r--arch/arm64/kvm/mmu.c19
-rw-r--r--arch/arm64/kvm/pmu-emul.c23
-rw-r--r--arch/arm64/kvm/psci.c3
-rw-r--r--arch/arm64/kvm/sys_regs.c3
-rw-r--r--arch/arm64/mm/ioremap.c8
-rw-r--r--arch/mips/include/asm/timex.h8
-rw-r--r--arch/mips/kernel/time.c11
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig4
-rw-r--r--arch/parisc/configs/generic-64bit_defconfig3
-rw-r--r--arch/parisc/include/asm/cacheflush.h31
-rw-r--r--arch/parisc/include/asm/page.h6
-rw-r--r--arch/parisc/include/asm/pgtable.h2
-rw-r--r--arch/parisc/kernel/cache.c328
-rw-r--r--arch/parisc/kernel/kprobes.c2
-rw-r--r--arch/parisc/kernel/processor.c11
-rw-r--r--arch/parisc/kernel/setup.c2
-rw-r--r--arch/parisc/kernel/time.c6
-rw-r--r--arch/parisc/kernel/traps.c2
-rw-r--r--arch/parisc/math-emu/dfadd.c2
-rw-r--r--arch/parisc/math-emu/dfsub.c2
-rw-r--r--arch/parisc/math-emu/sfadd.c2
-rw-r--r--arch/parisc/math-emu/sfsub.c2
-rw-r--r--arch/parisc/mm/fault.c6
-rw-r--r--arch/powerpc/kernel/module.c2
-rw-r--r--arch/powerpc/kernel/time.c29
-rw-r--r--arch/powerpc/kernel/vdso/gettimeofday.S9
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S26
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c9
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c45
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c44
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c16
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c4
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/perf/Makefile4
-rw-r--r--arch/powerpc/perf/power10-pmu.c2
-rw-r--r--arch/powerpc/perf/power9-pmu.c8
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c7
-rw-r--r--arch/powerpc/platforms/pseries/vas-sysfs.c19
-rw-r--r--arch/powerpc/platforms/pseries/vas.c23
-rw-r--r--arch/powerpc/platforms/pseries/vas.h2
-rw-r--r--arch/riscv/Kconfig.socs2
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi16
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts2
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi12
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi2
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/configs/rv32_defconfig1
-rw-r--r--arch/riscv/include/asm/kvm_host.h3
-rw-r--r--arch/riscv/kernel/patch.c2
-rw-r--r--arch/riscv/kvm/vcpu.c37
-rw-r--r--arch/riscv/kvm/vcpu_exit.c4
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c5
-rw-r--r--arch/riscv/mm/init.c22
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/kvm/interrupt.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c19
-rw-r--r--arch/s390/kvm/pv.c7
-rw-r--r--arch/s390/kvm/vsie.c4
-rw-r--r--arch/s390/mm/gmap.c7
-rw-r--r--arch/sparc/include/asm/cacheflush_32.h1
-rw-r--r--arch/um/drivers/ubd_kern.c3
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/header.S4
-rw-r--r--arch/x86/entry/entry_64.S3
-rw-r--r--arch/x86/events/intel/cstate.c7
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/intel-family.h3
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h4
-rw-r--r--arch/x86/include/asm/static_call.h1
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c6
-rw-r--r--arch/x86/kernel/fpu/core.c67
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/unwind_orc.c8
-rw-r--r--arch/x86/kvm/cpuid.c24
-rw-r--r--arch/x86/kvm/hyperv.c4
-rw-r--r--arch/x86/kvm/mmu.h24
-rw-r--r--arch/x86/kvm/mmu/mmu.c107
-rw-r--r--arch/x86/kvm/mmu/spte.c28
-rw-r--r--arch/x86/kvm/mmu/spte.h10
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h34
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c97
-rw-r--r--arch/x86/kvm/pmu.c7
-rw-r--r--arch/x86/kvm/pmu.h9
-rw-r--r--arch/x86/kvm/svm/pmu.c29
-rw-r--r--arch/x86/kvm/svm/sev.c109
-rw-r--r--arch/x86/kvm/svm/svm.c1
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/vmx/nested.c5
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--arch/x86/kvm/vmx/vmx.h1
-rw-r--r--arch/x86/kvm/x86.c68
-rw-r--r--arch/x86/lib/copy_user_64.S87
-rw-r--r--arch/x86/lib/putuser.S4
-rw-r--r--arch/x86/lib/retpoline.S2
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/mm/init_64.c5
-rw-r--r--arch/x86/mm/pat/set_memory.c11
-rw-r--r--arch/x86/pci/xen.c6
-rw-r--r--arch/x86/platform/efi/efi.c3
-rw-r--r--arch/x86/platform/pvh/head.S1
-rw-r--r--arch/x86/power/cpu.c10
-rw-r--r--arch/x86/xen/smp_pv.c5
-rw-r--r--arch/x86/xen/xen-head.S1
-rw-r--r--arch/xtensa/kernel/coprocessor.S4
-rw-r--r--arch/xtensa/kernel/jump_label.c2
-rw-r--r--arch/xtensa/platforms/iss/console.c8
-rw-r--r--block/Makefile1
-rw-r--r--block/badblocks.c2
-rw-r--r--block/bdev.c16
-rw-r--r--block/bfq-cgroup.c111
-rw-r--r--block/bfq-iosched.c107
-rw-r--r--block/bfq-iosched.h11
-rw-r--r--block/bio.c146
-rw-r--r--block/blk-cgroup-fc-appid.c57
-rw-r--r--block/blk-cgroup.c168
-rw-r--r--block/blk-cgroup.h140
-rw-r--r--block/blk-core.c85
-rw-r--r--block/blk-crypto-fallback.c15
-rw-r--r--block/blk-iocost.c88
-rw-r--r--block/blk-iolatency.c8
-rw-r--r--block/blk-lib.c124
-rw-r--r--block/blk-map.c47
-rw-r--r--block/blk-mq-debugfs.c2
-rw-r--r--block/blk-mq.c120
-rw-r--r--block/blk-settings.c74
-rw-r--r--block/blk-throttle.c5
-rw-r--r--block/blk.h21
-rw-r--r--block/bounce.c1
-rw-r--r--block/fops.c35
-rw-r--r--block/genhd.c4
-rw-r--r--block/ioctl.c48
-rw-r--r--block/mq-deadline.c1
-rw-r--r--block/partitions/acorn.c4
-rw-r--r--block/partitions/atari.c1
-rw-r--r--block/partitions/core.c14
-rw-r--r--block/partitions/ldm.c15
-rw-r--r--drivers/acpi/processor_idle.c8
-rw-r--r--drivers/android/binder.c10
-rw-r--r--drivers/ata/ahci.c5
-rw-r--r--drivers/ata/ahci_brcm.c7
-rw-r--r--drivers/ata/libata-core.c327
-rw-r--r--drivers/ata/libata-sata.c25
-rw-r--r--drivers/ata/libata-scsi.c46
-rw-r--r--drivers/ata/libata.h13
-rw-r--r--drivers/ata/pata_ftide010.c7
-rw-r--r--drivers/ata/pata_marvell.c2
-rw-r--r--drivers/ata/pata_mpc52xx.c3
-rw-r--r--drivers/ata/pata_sil680.c30
-rw-r--r--drivers/ata/pata_via.c6
-rw-r--r--drivers/ata/sata_gemini.c7
-rw-r--r--drivers/base/arch_topology.c11
-rw-r--r--drivers/base/firmware_loader/main.c17
-rw-r--r--drivers/base/topology.c10
-rw-r--r--drivers/block/Kconfig16
-rw-r--r--drivers/block/aoe/aoe.h2
-rw-r--r--drivers/block/aoe/aoeblk.c2
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/aoe/aoedev.c4
-rw-r--r--drivers/block/aoe/aoemain.c10
-rw-r--r--drivers/block/ataflop.c10
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_main.c62
-rw-r--r--drivers/block/drbd/drbd_nl.c127
-rw-r--r--drivers/block/drbd/drbd_receiver.c28
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/drbd/drbd_state.c3
-rw-r--r--drivers/block/drbd/drbd_worker.c2
-rw-r--r--drivers/block/floppy.c61
-rw-r--r--drivers/block/loop.c393
-rw-r--r--drivers/block/loop.h72
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/nbd.c37
-rw-r--r--drivers/block/null_blk/main.c93
-rw-r--r--drivers/block/null_blk/null_blk.h8
-rw-r--r--drivers/block/null_blk/zoned.c7
-rw-r--r--drivers/block/pktcdvd.c36
-rw-r--r--drivers/block/rbd.c1
-rw-r--r--drivers/block/rnbd/rnbd-clt.c20
-rw-r--r--drivers/block/rnbd/rnbd-srv-dev.h10
-rw-r--r--drivers/block/rnbd/rnbd-srv.c5
-rw-r--r--drivers/block/virtio_blk.c9
-rw-r--r--drivers/block/xen-blkback/blkback.c15
-rw-r--r--drivers/block/xen-blkback/xenbus.c14
-rw-r--r--drivers/block/xen-blkfront.c8
-rw-r--r--drivers/block/zram/zram_drv.c35
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-msi.c6
-rw-r--r--drivers/bus/imx-weim.c5
-rw-r--r--drivers/bus/mhi/host/pci_generic.c2
-rw-r--r--drivers/bus/sunxi-rsb.c2
-rw-r--r--drivers/bus/ti-sysc.c16
-rw-r--r--drivers/cdrom/cdrom.c38
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c7
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c5
-rw-r--r--drivers/char/random.c9
-rw-r--r--drivers/clk/at91/clk-generated.c4
-rw-r--r--drivers/clk/bcm/clk-bcm2835.c1
-rw-r--r--drivers/clk/microchip/clk-mpfs.c195
-rw-r--r--drivers/clk/qcom/clk-rcg2.c2
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun6i-rtc.c17
-rw-r--r--drivers/clk/sunxi/clk-sun9i-mmc.c2
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c70
-rw-r--r--drivers/cpufreq/sun50i-cpufreq-nvmem.c4
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c1
-rw-r--r--drivers/crypto/qcom-rng.c1
-rw-r--r--drivers/dma-buf/dma-buf.c16
-rw-r--r--drivers/dma/at_xdmac.c12
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-core.c16
-rw-r--r--drivers/dma/idxd/device.c6
-rw-r--r--drivers/dma/idxd/submit.c5
-rw-r--r--drivers/dma/idxd/sysfs.c6
-rw-r--r--drivers/dma/imx-sdma.c32
-rw-r--r--drivers/dma/mediatek/mtk-uart-apdma.c9
-rw-r--r--drivers/edac/synopsys_edac.c16
-rw-r--r--drivers/firewire/core-card.c3
-rw-r--r--drivers/firewire/core-cdev.c4
-rw-r--r--drivers/firewire/core-topology.c9
-rw-r--r--drivers/firewire/core-transaction.c30
-rw-r--r--drivers/firewire/sbp2.c13
-rw-r--r--drivers/firmware/cirrus/cs_dsp.c3
-rw-r--r--drivers/firmware/efi/Kconfig43
-rw-r--r--drivers/firmware/efi/efi.c13
-rw-r--r--drivers/firmware/efi/libstub/arm32-stub.c3
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c15
-rw-r--r--drivers/firmware/efi/libstub/efi-stub.c2
-rw-r--r--drivers/firmware/efi/libstub/efistub.h84
-rw-r--r--drivers/firmware/efi/libstub/randomalloc.c11
-rw-r--r--drivers/firmware/efi/libstub/riscv-stub.c32
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c119
-rw-r--r--drivers/gpio/gpio-mvebu.c10
-rw-r--r--drivers/gpio/gpio-pca953x.c4
-rw-r--r--drivers/gpio/gpio-vf610.c8
-rw-r--r--drivers/gpio/gpio-visconti.c7
-rw-r--r--drivers/gpio/gpiolib-of.c2
-rw-r--r--drivers/gpio/gpiolib.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c5
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c39
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c10
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c35
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c8
-rw-r--r--drivers/gpu/drm/bridge/Kconfig1
-rw-r--r--drivers/gpu/drm/dp/drm_dp_mst_topology.c1
-rw-r--r--drivers/gpu/drm/drm_of.c84
-rw-r--r--drivers/gpu/drm/i915/display/intel_dmc.c44
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c17
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c34
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c38
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c16
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h22
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c11
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c6
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.c11
-rw-r--r--drivers/gpu/drm/msm/dp/dp_panel.h1
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_backlight.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c2
-rw-r--r--drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_sync.c2
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_frontend.c3
-rw-r--r--drivers/gpu/drm/vc4/Kconfig3
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c43
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c13
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fb.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c28
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_irq.c26
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c7
-rw-r--r--drivers/hwmon/Kconfig2
-rw-r--r--drivers/hwmon/adt7470.c4
-rw-r--r--drivers/hwmon/asus_wmi_sensors.c2
-rw-r--r--drivers/hwmon/f71882fg.c5
-rw-r--r--drivers/hwmon/pmbus/delta-ahe50dc-fan.c16
-rw-r--r--drivers/hwmon/pmbus/pmbus_core.c3
-rw-r--r--drivers/hwmon/pmbus/xdpe12284.c2
-rw-r--r--drivers/hwmon/tmp401.c11
-rw-r--r--drivers/i2c/busses/i2c-ismt.c14
-rw-r--r--drivers/i2c/busses/i2c-mt7621.c10
-rw-r--r--drivers/i2c/busses/i2c-thunderx-pcidrv.c1
-rw-r--r--drivers/idle/intel_idle.c27
-rw-r--r--drivers/iio/adc/ad7280a.c12
-rw-r--r--drivers/iio/chemical/scd4x.c5
-rw-r--r--drivers/iio/dac/ad3552r.c6
-rw-r--r--drivers/iio/dac/ad5446.c2
-rw-r--r--drivers/iio/dac/ad5592r-base.c2
-rw-r--r--drivers/iio/dac/ltc2688.c2
-rw-r--r--drivers/iio/dac/ti-dac5571.c28
-rw-r--r--drivers/iio/filter/Kconfig1
-rw-r--r--drivers/iio/imu/bmi160/bmi160_core.c20
-rw-r--r--drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c15
-rw-r--r--drivers/iio/magnetometer/ak8975.c1
-rw-r--r--drivers/iio/proximity/sx9324.c32
-rw-r--r--drivers/iio/proximity/sx_common.c1
-rw-r--r--drivers/infiniband/hw/irdma/cm.c33
-rw-r--r--drivers/infiniband/hw/irdma/utils.c21
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c81
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c35
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c7
-rw-r--r--drivers/input/keyboard/cypress-sf.c14
-rw-r--r--drivers/input/keyboard/omap4-keypad.c2
-rw-r--r--drivers/input/touchscreen/ili210x.c20
-rw-r--r--drivers/interconnect/core.c8
-rw-r--r--drivers/interconnect/qcom/sc7180.c21
-rw-r--r--drivers/interconnect/qcom/sdx55.c21
-rw-r--r--drivers/iommu/apple-dart.c10
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c9
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c30
-rw-r--r--drivers/iommu/intel/iommu.c27
-rw-r--r--drivers/iommu/intel/svm.c4
-rw-r--r--drivers/iommu/iommu.c9
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/debug.c10
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/request.c6
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/dm-bufio.c9
-rw-r--r--drivers/md/dm-cache-target.c9
-rw-r--r--drivers/md/dm-clone-target.c9
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm-log-writes.c3
-rw-r--r--drivers/md/dm-raid.c9
-rw-r--r--drivers/md/dm-table.c25
-rw-r--r--drivers/md/dm-thin.c15
-rw-r--r--drivers/md/dm-zoned-target.c2
-rw-r--r--drivers/md/dm.c3
-rw-r--r--drivers/md/md-bitmap.c45
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md-linear.c11
-rw-r--r--drivers/md/md.c67
-rw-r--r--drivers/md/md.h62
-rw-r--r--drivers/md/raid0.c38
-rw-r--r--drivers/md/raid1.c73
-rw-r--r--drivers/md/raid10.c81
-rw-r--r--drivers/md/raid5-cache.c8
-rw-r--r--drivers/md/raid5-ppl.c13
-rw-r--r--drivers/md/raid5.c241
-rw-r--r--drivers/md/raid5.h23
-rw-r--r--drivers/memory/renesas-rpc-if.c60
-rw-r--r--drivers/misc/eeprom/at25.c19
-rw-r--r--drivers/mmc/core/mmc.c23
-rw-r--r--drivers/mmc/core/mmc_ops.c2
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/mmc/host/sdhci-msm.c42
-rw-r--r--drivers/mmc/host/sunxi-mmc.c5
-rw-r--r--drivers/mtd/mtd_blkdevs.c1
-rw-r--r--drivers/mtd/nand/raw/mtk_ecc.c12
-rw-r--r--drivers/mtd/nand/raw/qcom_nandc.c24
-rw-r--r--drivers/mtd/nand/raw/sh_flctl.c14
-rw-r--r--drivers/net/bonding/bond_main.c13
-rw-r--r--drivers/net/can/grcan.c46
-rw-r--r--drivers/net/can/m_can/m_can.c24
-rw-r--r--drivers/net/can/m_can/m_can.h3
-rw-r--r--drivers/net/can/m_can/m_can_pci.c48
-rw-r--r--drivers/net/dsa/b53/b53_common.c36
-rw-r--r--drivers/net/dsa/b53/b53_priv.h24
-rw-r--r--drivers/net/dsa/b53/b53_serdes.c74
-rw-r--r--drivers/net/dsa/b53/b53_serdes.h9
-rw-r--r--drivers/net/dsa/b53/b53_srab.c4
-rw-r--r--drivers/net/dsa/bcm_sf2.c3
-rw-r--r--drivers/net/dsa/lantiq_gswip.c3
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c38
-rw-r--r--drivers/net/dsa/mt7530.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/port_hidden.c5
-rw-r--r--drivers/net/dsa/ocelot/felix.c7
-rw-r--r--drivers/net/dsa/realtek/realtek-mdio.c1
-rw-r--r--drivers/net/dsa/realtek/realtek-smi.c4
-rw-r--r--drivers/net/ethernet/Kconfig26
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c20
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c7
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c9
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c13
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c15
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c11
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c10
-rw-r--r--drivers/net/ethernet/dec/tulip/tulip_core.c5
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c5
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_qos.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c84
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c9
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c31
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c7
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c129
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h6
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c27
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc.c25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c29
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c95
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c7
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_i225.c11
-rw-r--r--drivers/net/ethernet/intel/igc/igc_phy.c4
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c15
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c3
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_sgmii.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c131
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c85
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_mac.c4
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c31
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c27
-rw-r--r--drivers/net/ethernet/mscc/ocelot_flower.c9
-rw-r--r--drivers/net/ethernet/mscc/ocelot_vcap.c9
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c3
-rw-r--r--drivers/net/ethernet/qlogic/qla3xxx.c3
-rw-r--r--drivers/net/ethernet/sfc/ef10.c5
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c7
-rw-r--r--drivers/net/ethernet/sfc/ptp.c14
-rw-r--r--drivers/net/ethernet/sfc/ptp.h1
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c4
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c5
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c30
-rw-r--r--drivers/net/hippi/rrunner.c2
-rw-r--r--drivers/net/ipa/gsi.c6
-rw-r--r--drivers/net/ipa/ipa_endpoint.c13
-rw-r--r--drivers/net/ipa/ipa_qmi.c2
-rw-r--r--drivers/net/mdio/mdio-mux-bcm6368.c2
-rw-r--r--drivers/net/phy/marvell10g.c2
-rw-r--r--drivers/net/phy/micrel.c10
-rw-r--r--drivers/net/phy/microchip_t1.c1
-rw-r--r--drivers/net/phy/phy.c7
-rw-r--r--drivers/net/phy/sfp.c12
-rw-r--r--drivers/net/ppp/pppoe.c1
-rw-r--r--drivers/net/virtio_net.c20
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c6
-rw-r--r--drivers/net/wan/cosa.c2
-rw-r--r--drivers/net/wireguard/device.c3
-rw-r--r--drivers/net/wireless/ath/ath11k/core.c1
-rw-r--r--drivers/net/wireless/ath/ath11k/core.h13
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.c71
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.h2
-rw-r--r--drivers/net/wireless/ath/ath11k/reg.c43
-rw-r--r--drivers/net/wireless/ath/ath11k/reg.h2
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c2
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c10
-rw-r--r--drivers/nfc/nfcmrvl/main.c2
-rw-r--r--drivers/nfc/pn533/pn533.c5
-rw-r--r--drivers/nvme/host/constants.c5
-rw-r--r--drivers/nvme/host/core.c111
-rw-r--r--drivers/nvme/host/fabrics.h8
-rw-r--r--drivers/nvme/host/fc.c26
-rw-r--r--drivers/nvme/host/ioctl.c278
-rw-r--r--drivers/nvme/host/multipath.c1
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c5
-rw-r--r--drivers/nvme/host/rdma.c5
-rw-r--r--drivers/nvme/host/tcp.c5
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c2
-rw-r--r--drivers/nvme/target/zns.c3
-rw-r--r--drivers/pci/controller/dwc/pcie-qcom.c7
-rw-r--r--drivers/pci/controller/pci-aardvark.c48
-rw-r--r--drivers/pci/pci.c10
-rw-r--r--drivers/perf/arm_pmu.c10
-rw-r--r--drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c20
-rw-r--r--drivers/phy/motorola/phy-mapphone-mdm6600.c3
-rw-r--r--drivers/phy/samsung/phy-exynos5250-sata.c21
-rw-r--r--drivers/phy/ti/phy-am654-serdes.c2
-rw-r--r--drivers/phy/ti/phy-omap-usb2.c2
-rw-r--r--drivers/phy/ti/phy-ti-pipe3.c1
-rw-r--r--drivers/phy/ti/phy-tusb1210.c12
-rw-r--r--drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c17
-rw-r--r--drivers/pinctrl/intel/pinctrl-alderlake.c60
-rw-r--r--drivers/pinctrl/mediatek/Kconfig1
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mt8365.c2
-rw-r--r--drivers/pinctrl/pinctrl-ocelot.c4
-rw-r--r--drivers/pinctrl/pinctrl-pistachio.c6
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.c69
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sm6350.c16
-rw-r--r--drivers/pinctrl/samsung/Kconfig11
-rw-r--r--drivers/pinctrl/samsung/pinctrl-exynos-arm64.c2
-rw-r--r--drivers/pinctrl/stm32/pinctrl-stm32.c23
-rw-r--r--drivers/pinctrl/sunplus/sppctl_sp7021.c8
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c4
-rw-r--r--drivers/platform/surface/aggregator/core.c2
-rw-r--r--drivers/platform/surface/surface_gpe.c8
-rw-r--r--drivers/platform/x86/asus-wmi.c15
-rw-r--r--drivers/platform/x86/dell/dell-laptop.c13
-rw-r--r--drivers/platform/x86/gigabyte-wmi.c1
-rw-r--r--drivers/platform/x86/intel/pmc/core.h2
-rw-r--r--drivers/platform/x86/intel/pmt/telemetry.c2
-rw-r--r--drivers/platform/x86/intel/sdsi.c44
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c3
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c175
-rw-r--r--drivers/ptp/ptp_ocp.c64
-rw-r--r--drivers/rtc/rtc-sun6i.c17
-rw-r--r--drivers/s390/block/dasd.c18
-rw-r--r--drivers/s390/block/dasd_eckd.c33
-rw-r--r--drivers/s390/block/dasd_fba.c2
-rw-r--r--drivers/s390/block/dasd_int.h14
-rw-r--r--drivers/s390/net/ctcm_mpc.c6
-rw-r--r--drivers/s390/net/ctcm_sysfs.c5
-rw-r--r--drivers/s390/net/lcs.c7
-rw-r--r--drivers/scsi/device_handler/scsi_dh_alua.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c6
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c3
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--drivers/scsi/sr_ioctl.c15
-rw-r--r--drivers/scsi/ufs/ufshpb.c19
-rw-r--r--drivers/slimbus/qcom-ctrl.c4
-rw-r--r--drivers/soc/imx/imx8m-blk-ctrl.c2
-rw-r--r--drivers/spi/atmel-quadspi.c3
-rw-r--r--drivers/spi/spi-cadence-quadspi.c19
-rw-r--r--drivers/spi/spi-intel-pci.c1
-rw-r--r--drivers/spi/spi-mtk-nor.c12
-rw-r--r--drivers/target/iscsi/iscsi_target.c32
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c24
-rw-r--r--drivers/target/target_core_device.c20
-rw-r--r--drivers/target/target_core_file.c10
-rw-r--r--drivers/target/target_core_iblock.c17
-rw-r--r--drivers/target/target_core_pscsi.c46
-rw-r--r--drivers/tee/optee/ffa_abi.c1
-rw-r--r--drivers/thermal/Kconfig6
-rw-r--r--drivers/thermal/gov_user_space.c3
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c52
-rw-r--r--drivers/thermal/thermal_sysfs.c3
-rw-r--r--drivers/tty/n_gsm.c495
-rw-r--r--drivers/tty/serial/8250/8250_mtk.c29
-rw-r--r--drivers/tty/serial/8250/8250_pci.c8
-rw-r--r--drivers/tty/serial/8250/8250_port.c6
-rw-r--r--drivers/tty/serial/amba-pl011.c9
-rw-r--r--drivers/tty/serial/digicolor-usart.c5
-rw-r--r--drivers/tty/serial/fsl_lpuart.c18
-rw-r--r--drivers/tty/serial/imx.c2
-rw-r--r--drivers/tty/serial/sc16is7xx.c6
-rw-r--r--drivers/usb/cdns3/cdns3-gadget.c7
-rw-r--r--drivers/usb/class/cdc-wdm.c1
-rw-r--r--drivers/usb/core/devio.c14
-rw-r--r--drivers/usb/core/quirks.c6
-rw-r--r--drivers/usb/dwc3/core.c34
-rw-r--r--drivers/usb/dwc3/drd.c11
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c8
-rw-r--r--drivers/usb/dwc3/gadget.c31
-rw-r--r--drivers/usb/gadget/configfs.c2
-rw-r--r--drivers/usb/gadget/function/f_uvc.c25
-rw-r--r--drivers/usb/gadget/function/uvc.h2
-rw-r--r--drivers/usb/gadget/function/uvc_queue.c2
-rw-r--r--drivers/usb/gadget/function/uvc_v4l2.c3
-rw-r--r--drivers/usb/gadget/legacy/raw_gadget.c2
-rw-r--r--drivers/usb/host/ehci-hcd.c23
-rw-r--r--drivers/usb/host/ehci-pci.c4
-rw-r--r--drivers/usb/host/ehci.h1
-rw-r--r--drivers/usb/host/xhci-hub.c2
-rw-r--r--drivers/usb/host/xhci-mtk-sch.c90
-rw-r--r--drivers/usb/host/xhci-mtk.h2
-rw-r--r--drivers/usb/host/xhci-pci.c4
-rw-r--r--drivers/usb/host/xhci-ring.c1
-rw-r--r--drivers/usb/host/xhci-tegra.c4
-rw-r--r--drivers/usb/host/xhci.c11
-rw-r--r--drivers/usb/misc/qcom_eud.c10
-rw-r--r--drivers/usb/misc/uss720.c3
-rw-r--r--drivers/usb/mtu3/mtu3_dr.c6
-rw-r--r--drivers/usb/phy/phy-generic.c7
-rw-r--r--drivers/usb/serial/cp210x.c2
-rw-r--r--drivers/usb/serial/option.c16
-rw-r--r--drivers/usb/serial/pl2303.c1
-rw-r--r--drivers/usb/serial/pl2303.h1
-rw-r--r--drivers/usb/serial/qcserial.c2
-rw-r--r--drivers/usb/serial/whiteheat.c5
-rw-r--r--drivers/usb/typec/Kconfig1
-rw-r--r--drivers/usb/typec/tcpm/tcpci.c2
-rw-r--r--drivers/usb/typec/tcpm/tcpci_mt6360.c26
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c24
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c61
-rw-r--r--drivers/vhost/net.c15
-rw-r--r--drivers/video/fbdev/arkfb.c3
-rw-r--r--drivers/video/fbdev/aty/aty128fb.c1
-rw-r--r--drivers/video/fbdev/aty/atyfb_base.c1
-rw-r--r--drivers/video/fbdev/aty/radeon_pm.c1
-rw-r--r--drivers/video/fbdev/aty/radeonfb.h2
-rw-r--r--drivers/video/fbdev/clps711x-fb.c3
-rw-r--r--drivers/video/fbdev/controlfb.c3
-rw-r--r--drivers/video/fbdev/core/fbsysfs.c4
-rw-r--r--drivers/video/fbdev/efifb.c9
-rw-r--r--drivers/video/fbdev/i740fb.c5
-rw-r--r--drivers/video/fbdev/imxfb.c2
-rw-r--r--drivers/video/fbdev/kyro/fbdev.c2
-rw-r--r--drivers/video/fbdev/matrox/matroxfb_base.h1
-rw-r--r--drivers/video/fbdev/mb862xx/mb862xxfbdrv.c2
-rw-r--r--drivers/video/fbdev/mmp/core.c11
-rw-r--r--drivers/video/fbdev/neofb.c2
-rw-r--r--drivers/video/fbdev/omap/hwa742.c6
-rw-r--r--drivers/video/fbdev/omap/lcdc.c6
-rw-r--r--drivers/video/fbdev/omap/sossi.c5
-rw-r--r--drivers/video/fbdev/platinumfb.c2
-rw-r--r--drivers/video/fbdev/pm2fb.c8
-rw-r--r--drivers/video/fbdev/pxafb.c4
-rw-r--r--drivers/video/fbdev/s3fb.c3
-rw-r--r--drivers/video/fbdev/sh_mobile_lcdcfb.c3
-rw-r--r--drivers/video/fbdev/simplefb.c8
-rw-r--r--drivers/video/fbdev/sis/sis_main.c2
-rw-r--r--drivers/video/fbdev/tridentfb.c3
-rw-r--r--drivers/video/fbdev/udlfb.c14
-rw-r--r--drivers/video/fbdev/valkyriefb.c3
-rw-r--r--drivers/video/fbdev/vesafb.c8
-rw-r--r--drivers/video/fbdev/vt8623fb.c3
-rw-r--r--drivers/video/of_display_timing.c2
-rw-r--r--drivers/virt/Kconfig3
-rw-r--r--drivers/virt/Makefile1
-rw-r--r--drivers/virt/coco/efi_secret/Kconfig16
-rw-r--r--drivers/virt/coco/efi_secret/Makefile2
-rw-r--r--drivers/virt/coco/efi_secret/efi_secret.c349
-rw-r--r--drivers/xen/gntalloc.c4
-rw-r--r--fs/afs/inode.c14
-rw-r--r--fs/btrfs/btrfs_inode.h11
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/dev-replace.c7
-rw-r--r--fs/btrfs/disk-io.c23
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/extent_io.c44
-rw-r--r--fs/btrfs/inode.c33
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/btrfs/props.c59
-rw-r--r--fs/btrfs/props.h4
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/sysfs.c3
-rw-r--r--fs/btrfs/tree-log.c54
-rw-r--r--fs/btrfs/volumes.c15
-rw-r--r--fs/btrfs/volumes.h7
-rw-r--r--fs/btrfs/xattr.c11
-rw-r--r--fs/btrfs/zoned.c37
-rw-r--r--fs/btrfs/zoned.h4
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/ceph/caps.c7
-rw-r--r--fs/ceph/file.c16
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/cifs/connect.c11
-rw-r--r--fs/cifs/dfs_cache.c19
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/transport.c3
-rw-r--r--fs/direct-io.c32
-rw-r--r--fs/erofs/zdata.c12
-rw-r--r--fs/erofs/zdata.h2
-rw-r--r--fs/exfat/file.c5
-rw-r--r--fs/exfat/super.c10
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/extents.c32
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/ioctl.c26
-rw-r--r--fs/ext4/mballoc.c10
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/super.c50
-rw-r--r--fs/f2fs/checkpoint.c6
-rw-r--r--fs/f2fs/data.c33
-rw-r--r--fs/f2fs/f2fs.h12
-rw-r--r--fs/f2fs/file.c19
-rw-r--r--fs/f2fs/inode.c3
-rw-r--r--fs/f2fs/segment.c103
-rw-r--r--fs/f2fs/super.c32
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fs-writeback.c17
-rw-r--r--fs/gfs2/bmap.c11
-rw-r--r--fs/gfs2/file.c143
-rw-r--r--fs/gfs2/rgrp.c7
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/internal.h29
-rw-r--r--fs/io-wq.c4
-rw-r--r--fs/io-wq.h1
-rw-r--r--fs/io_uring.c3584
-rw-r--r--fs/iomap/direct-io.c10
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jfs/ioctl.c5
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/kernfs/dir.c7
-rw-r--r--fs/ksmbd/misc.c40
-rw-r--r--fs/ksmbd/misc.h3
-rw-r--r--fs/ksmbd/oplock.c30
-rw-r--r--fs/ksmbd/oplock.h2
-rw-r--r--fs/ksmbd/smb2pdu.c34
-rw-r--r--fs/ksmbd/vfs.c6
-rw-r--r--fs/ksmbd/vfs_cache.c2
-rw-r--r--fs/ksmbd/vfs_cache.h1
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/fs_context.c2
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nilfs2/ioctl.c6
-rw-r--r--fs/nilfs2/sufile.c4
-rw-r--r--fs/nilfs2/the_nilfs.c4
-rw-r--r--fs/notify/fanotify/fanotify_user.c13
-rw-r--r--fs/ntfs3/file.c6
-rw-r--r--fs/ntfs3/super.c10
-rw-r--r--fs/ocfs2/ioctl.c5
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/posix_acl.c10
-rw-r--r--fs/proc/fd.c23
-rw-r--r--fs/squashfs/block.c20
-rw-r--r--fs/super.c2
-rw-r--r--fs/udf/namei.c8
-rw-r--r--fs/xattr.c143
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_buf.h42
-rw-r--r--fs/xfs/xfs_discard.c8
-rw-r--r--fs/xfs/xfs_inode.c24
-rw-r--r--fs/xfs/xfs_log_cil.c2
-rw-r--r--fs/xfs/xfs_super.c12
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/zonefs/Makefile2
-rw-r--r--fs/zonefs/super.c227
-rw-r--r--fs/zonefs/sysfs.c139
-rw-r--r--fs/zonefs/zonefs.h18
-rw-r--r--include/asm-generic/bug.h11
-rw-r--r--include/dt-bindings/clock/microchip,mpfs-clock.h5
-rw-r--r--include/linux/audit.h2
-rw-r--r--include/linux/backing-dev.h6
-rw-r--r--include/linux/bio.h15
-rw-r--r--include/linux/blk-cgroup.h258
-rw-r--r--include/linux/blk-mq.h1
-rw-r--r--include/linux/blk_types.h23
-rw-r--r--include/linux/blkdev.h134
-rw-r--r--include/linux/blktrace_api.h10
-rw-r--r--include/linux/cdrom.h1
-rw-r--r--include/linux/ceph/osd_client.h3
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/dma-buf-map.h266
-rw-r--r--include/linux/efi.h27
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/io_uring.h38
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/kthread.h4
-rw-r--r--include/linux/kvm_host.h26
-rw-r--r--include/linux/libata.h3
-rw-r--r--include/linux/memcontrol.h5
-rw-r--r--include/linux/mm.h8
-rw-r--r--include/linux/mtd/mtd.h6
-rw-r--r--include/linux/netdev_features.h4
-rw-r--r--include/linux/netdevice.h23
-rw-r--r--include/linux/nvme.h46
-rw-r--r--include/linux/posix_acl_xattr.h4
-rw-r--r--include/linux/rcupdate.h1
-rw-r--r--include/linux/sched.h42
-rw-r--r--include/linux/sched/mm.h8
-rw-r--r--include/linux/sched/signal.h13
-rw-r--r--include/linux/socket.h7
-rw-r--r--include/linux/srcutree.h32
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/linux/task_work.h1
-rw-r--r--include/linux/torture.h2
-rw-r--r--include/linux/usb/pd_bdo.h2
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/memory/renesas-rpc-if.h1
-rw-r--r--include/net/bluetooth/hci.h1
-rw-r--r--include/net/bluetooth/hci_core.h5
-rw-r--r--include/net/esp.h2
-rw-r--r--include/net/inet_hashtables.h2
-rw-r--r--include/net/inet_timewait_sock.h3
-rw-r--r--include/net/ip.h1
-rw-r--r--include/net/ip6_tunnel.h2
-rw-r--r--include/net/ip_tunnels.h13
-rw-r--r--include/net/netns/ipv6.h4
-rw-r--r--include/net/secure_seq.h4
-rw-r--r--include/net/tc_act/tc_pedit.h1
-rw-r--r--include/net/tcp.h8
-rw-r--r--include/net/xfrm.h14
-rw-r--r--include/net/xsk_buff_pool.h1
-rw-r--r--include/soc/mscc/ocelot_vcap.h2
-rw-r--r--include/sound/soc-component.h4
-rw-r--r--include/target/target_core_backend.h4
-rw-r--r--include/trace/events/io_uring.h107
-rw-r--r--include/trace/events/sched.h6
-rw-r--r--include/uapi/linux/cdrom.h2
-rw-r--r--include/uapi/linux/dma-buf.h4
-rw-r--r--include/uapi/linux/elf.h2
-rw-r--r--include/uapi/linux/fb.h2
-rw-r--r--include/uapi/linux/input-event-codes.h21
-rw-r--r--include/uapi/linux/io_uring.h128
-rw-r--r--include/uapi/linux/kvm.h10
-rw-r--r--include/uapi/linux/loop.h7
-rw-r--r--include/uapi/linux/nvme_ioctl.h28
-rw-r--r--include/uapi/linux/rfkill.h2
-rw-r--r--include/uapi/linux/virtio_ids.h14
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/bpf/Kconfig1
-rw-r--r--kernel/cgroup/cpuset.c7
-rw-r--r--kernel/cpu.c22
-rw-r--r--kernel/events/core.c16
-rw-r--r--kernel/events/internal.h5
-rw-r--r--kernel/events/ring_buffer.c5
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/irqdesc.c3
-rw-r--r--kernel/irq/manage.c39
-rw-r--r--kernel/kcov.c7
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/rcu/Kconfig73
-rw-r--r--kernel/rcu/Kconfig.debug23
-rw-r--r--kernel/rcu/rcu.h13
-rw-r--r--kernel/rcu/rcu_segcblist.c8
-rw-r--r--kernel/rcu/rcuscale.c22
-rw-r--r--kernel/rcu/rcutorture.c129
-rw-r--r--kernel/rcu/refscale.c22
-rw-r--r--kernel/rcu/srcutree.c639
-rw-r--r--kernel/rcu/sync.c2
-rw-r--r--kernel/rcu/tasks.h89
-rw-r--r--kernel/rcu/tree.c146
-rw-r--r--kernel/rcu/tree.h9
-rw-r--r--kernel/rcu/tree_exp.h151
-rw-r--r--kernel/rcu/tree_nocb.h39
-rw-r--r--kernel/rcu/tree_plugin.h28
-rw-r--r--kernel/rcu/tree_stall.h36
-rw-r--r--kernel/rcu/update.c2
-rw-r--r--kernel/scftorture.c5
-rw-r--r--kernel/sched/core.c14
-rw-r--r--kernel/sched/fair.c10
-rw-r--r--kernel/smp.c7
-rw-r--r--kernel/smpboot.c7
-rw-r--r--kernel/task_work.c25
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/blktrace.c26
-rw-r--r--kernel/trace/fgraph.c4
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace_events.c8
-rw-r--r--kernel/trace/trace_osnoise.c4
-rw-r--r--kernel/trace/trace_sched_switch.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--lib/debugobjects.c5
-rw-r--r--lib/dim/net_dim.c44
-rw-r--r--lib/hexdump.c41
-rw-r--r--lib/irq_poll.c8
-rw-r--r--lib/percpu-refcount.c1
-rw-r--r--lib/strncpy_from_user.c2
-rw-r--r--lib/strnlen_user.c2
-rw-r--r--lib/xarray.c2
-rw-r--r--mm/backing-dev.c19
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/kasan/quarantine.c7
-rw-r--r--mm/kfence/core.c10
-rw-r--r--mm/memcontrol.c12
-rw-r--r--mm/memory-failure.c147
-rw-r--r--mm/mmap.c8
-rw-r--r--mm/mmu_notifier.c14
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c54
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/readahead.c16
-rw-r--r--mm/swapfile.c32
-rw-r--r--mm/userfaultfd.c15
-rw-r--r--mm/util.c11
-rw-r--r--mm/vmalloc.c53
-rw-r--r--mm/workingset.c2
-rw-r--r--net/batman-adv/fragmentation.c11
-rw-r--r--net/bluetooth/hci_conn.c32
-rw-r--r--net/bluetooth/hci_core.c6
-rw-r--r--net/bluetooth/hci_event.c80
-rw-r--r--net/bluetooth/hci_sync.c11
-rw-r--r--net/bpf/test_run.c5
-rw-r--r--net/bridge/br_input.c7
-rw-r--r--net/bridge/br_switchdev.c2
-rw-r--r--net/can/isotp.c35
-rw-r--r--net/ceph/osd_client.c317
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c6
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_neigh.c3
-rw-r--r--net/decnet/dn_route.c4
-rw-r--r--net/dsa/port.c3
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/dsa/tag_hellcreek.c8
-rw-r--r--net/ipv4/esp4.c5
-rw-r--r--net/ipv4/igmp.c9
-rw-r--r--net/ipv4/inet_hashtables.c42
-rw-r--r--net/ipv4/inet_timewait_sock.c58
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c0
-rw-r--r--net/ipv4/ping.c12
-rw-r--r--net/ipv4/route.c24
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/tcp_rate.c11
-rw-r--r--net/ipv6/esp6.c5
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_gre.c28
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/netfilter.c10
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/key/af_key.c12
-rw-r--r--net/l3mdev/l3mdev.c2
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mctp/device.c2
-rw-r--r--net/mptcp/options.c36
-rw-r--r--net/mptcp/pm.c5
-rw-r--r--net/mptcp/protocol.h19
-rw-r--r--net/mptcp/subflow.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c21
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c60
-rw-r--r--net/netfilter/nf_flow_table_ip.c19
-rw-r--r--net/netfilter/nf_tables_api.c11
-rw-r--r--net/netfilter/nft_flow_offload.c28
-rw-r--r--net/netfilter/nft_set_rbtree.c6
-rw-r--r--net/netfilter/nft_socket.c52
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/nfc/core.c29
-rw-r--r--net/nfc/nci/data.c2
-rw-r--r--net/nfc/nci/hci.c4
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/packet/af_packet.c13
-rw-r--r--net/rds/tcp.c18
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_connect.c5
-rw-r--r--net/rds/tcp_listen.c5
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/net_ns.c2
-rw-r--r--net/sched/act_pedit.c30
-rw-r--r--net/sched/cls_u32.c24
-rw-r--r--net/sctp/sm_sideeffect.c4
-rw-r--r--net/smc/af_smc.c141
-rw-r--r--net/smc/smc.h29
-rw-r--r--net/smc/smc_close.c5
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/socket.c52
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c1
-rw-r--r--net/sunrpc/clnt.c44
-rw-r--r--net/sunrpc/xprtsock.c35
-rw-r--r--net/tls/tls_device.c15
-rw-r--r--net/wireless/nl80211.c18
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/xdp/xsk.c15
-rw-r--r--net/xdp/xsk_buff_pool.c16
-rw-r--r--net/xfrm/xfrm_policy.c6
-rw-r--r--samples/trace_events/trace_custom_sched.h6
-rw-r--r--scripts/Makefile.build2
-rwxr-xr-xscripts/link-vmlinux.sh2
-rw-r--r--security/selinux/ss/hashtab.c3
-rw-r--r--sound/firewire/fireworks/fireworks_hwdep.c1
-rw-r--r--sound/hda/hdac_i915.c7
-rw-r--r--sound/hda/intel-dsp-config.c9
-rw-r--r--sound/isa/wavefront/wavefront_synth.c3
-rw-r--r--sound/oss/dmasound/dmasound_core.c2
-rw-r--r--sound/pci/hda/patch_hdmi.c7
-rw-r--r--sound/pci/hda/patch_realtek.c98
-rw-r--r--sound/soc/atmel/mchp-pdmc.c1
-rw-r--r--sound/soc/atmel/sam9g20_wm8731.c61
-rw-r--r--sound/soc/codecs/cs35l41-lib.c10
-rw-r--r--sound/soc/codecs/da7219.c14
-rw-r--r--sound/soc/codecs/lpass-rx-macro.c14
-rw-r--r--sound/soc/codecs/lpass-tx-macro.c14
-rw-r--r--sound/soc/codecs/lpass-va-macro.c8
-rw-r--r--sound/soc/codecs/max98090.c5
-rw-r--r--sound/soc/codecs/msm8916-wcd-digital.c9
-rw-r--r--sound/soc/codecs/rk817_codec.c2
-rw-r--r--sound/soc/codecs/rt5682.c20
-rw-r--r--sound/soc/codecs/rt5682s.c11
-rw-r--r--sound/soc/codecs/rt711.c7
-rw-r--r--sound/soc/codecs/rt9120.c1
-rw-r--r--sound/soc/codecs/wcd934x.c26
-rw-r--r--sound/soc/codecs/wm8731.c19
-rw-r--r--sound/soc/codecs/wm8958-dsp2.c8
-rw-r--r--sound/soc/fsl/fsl_sai.c2
-rw-r--r--sound/soc/generic/simple-card-utils.c8
-rw-r--r--sound/soc/intel/boards/sof_es8336.c117
-rw-r--r--sound/soc/intel/boards/sof_rt5682.c13
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-tgl-match.c4
-rw-r--r--sound/soc/meson/aiu-acodec-ctrl.c5
-rw-r--r--sound/soc/meson/aiu-codec-ctrl.c5
-rw-r--r--sound/soc/meson/aiu.c3
-rw-r--r--sound/soc/meson/axg-card.c1
-rw-r--r--sound/soc/meson/axg-tdm-interface.c26
-rw-r--r--sound/soc/meson/g12a-tohdmitx.c2
-rw-r--r--sound/soc/soc-core.c5
-rw-r--r--sound/soc/soc-dapm.c6
-rw-r--r--sound/soc/soc-generic-dmaengine-pcm.c6
-rw-r--r--sound/soc/soc-ops.c20
-rw-r--r--sound/soc/soc-pcm.c2
-rw-r--r--sound/soc/soc-topology.c4
-rw-r--r--sound/soc/sof/sof-pci-dev.c14
-rw-r--r--sound/soc/sof/topology.c50
-rw-r--r--sound/usb/midi.c1
-rw-r--r--sound/usb/mixer_maps.c4
-rw-r--r--sound/usb/quirks-table.h9
-rw-r--r--sound/usb/quirks.c2
-rw-r--r--tools/Makefile4
-rw-r--r--tools/build/Makefile.feature1
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c7
-rw-r--r--tools/include/linux/slab.h8
-rw-r--r--tools/include/nolibc/Makefile42
-rw-r--r--tools/include/nolibc/arch-aarch64.h199
-rw-r--r--tools/include/nolibc/arch-arm.h204
-rw-r--r--tools/include/nolibc/arch-i386.h219
-rw-r--r--tools/include/nolibc/arch-mips.h215
-rw-r--r--tools/include/nolibc/arch-riscv.h204
-rw-r--r--tools/include/nolibc/arch-x86_64.h215
-rw-r--r--tools/include/nolibc/arch.h32
-rw-r--r--tools/include/nolibc/ctype.h99
-rw-r--r--tools/include/nolibc/errno.h27
-rw-r--r--tools/include/nolibc/nolibc.h2540
-rw-r--r--tools/include/nolibc/signal.h22
-rw-r--r--tools/include/nolibc/std.h49
-rw-r--r--tools/include/nolibc/stdio.h306
-rw-r--r--tools/include/nolibc/stdlib.h423
-rw-r--r--tools/include/nolibc/string.h285
-rw-r--r--tools/include/nolibc/sys.h1247
-rw-r--r--tools/include/nolibc/time.h28
-rw-r--r--tools/include/nolibc/types.h205
-rw-r--r--tools/include/nolibc/unistd.h54
-rw-r--r--tools/include/uapi/linux/kvm.h10
-rw-r--r--tools/memory-model/README3
-rw-r--r--tools/objtool/check.c94
-rw-r--r--tools/objtool/elf.c189
-rw-r--r--tools/objtool/include/objtool/elf.h4
-rw-r--r--tools/objtool/include/objtool/objtool.h2
-rw-r--r--tools/objtool/objtool.c1
-rw-r--r--tools/perf/Makefile.config7
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c10
-rw-r--r--tools/perf/arch/arm64/util/machine.c21
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/machine.c25
-rw-r--r--tools/perf/arch/s390/util/machine.c16
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c12
-rw-r--r--tools/perf/bench/numa.c4
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/tests/attr/README1
-rw-r--r--tools/perf/tests/attr/test-record-spe-physical-address12
-rw-r--r--tools/perf/tests/bpf.c10
-rw-r--r--tools/perf/tests/builtin-test.c8
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c4
-rwxr-xr-xtools/perf/tests/shell/stat_all_pmu.sh10
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh1
-rw-r--r--tools/perf/tests/topology.c11
-rw-r--r--tools/perf/util/arm-spe.c5
-rw-r--r--tools/perf/util/bpf-event.c4
-rw-r--r--tools/perf/util/c++/clang.cpp4
-rw-r--r--tools/perf/util/session.c28
-rw-r--r--tools/perf/util/stat.c17
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/symbol.c37
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/power/x86/intel-speed-select/Makefile2
-rw-r--r--tools/testing/radix-tree/linux.c3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h17
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c202
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/emulator_error_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c37
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c1
-rw-r--r--tools/testing/selftests/net/Makefile6
-rw-r--r--tools/testing/selftests/net/bpf/Makefile14
-rw-r--r--tools/testing/selftests/net/bpf/nat6to4.c285
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile33
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh3
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh48
-rw-r--r--tools/testing/selftests/net/so_txtime.c4
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh101
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh7
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh10
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh29
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS024
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE013
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE022
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE045
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE071
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE092
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE101
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh16
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFcommon4
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/PREEMPT1
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh3
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c10
-rw-r--r--tools/testing/selftests/vm/Makefile10
-rw-r--r--tools/testing/selftests/vm/mremap_test.c85
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh11
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh34
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore1
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile205
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64el.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64.config13
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv32.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv64.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/s390x.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c6
-rw-r--r--virt/kvm/dirty_ring.c2
-rw-r--r--virt/kvm/eventfd.c3
-rw-r--r--virt/kvm/kvm_main.c46
-rw-r--r--virt/kvm/kvm_mm.h2
1369 files changed, 22818 insertions, 12737 deletions
diff --git a/.mailmap b/.mailmap
index 93458154ce7d..0b04aa20c431 100644
--- a/.mailmap
+++ b/.mailmap
@@ -45,6 +45,7 @@ Andrey Konovalov <andreyknvl@gmail.com> <andreyknvl@google.com>
Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
+André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
Andy Adamson <andros@citi.umich.edu>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
@@ -204,6 +205,7 @@ Juha Yrjola <at solidboot.com>
Juha Yrjola <juha.yrjola@nokia.com>
Juha Yrjola <juha.yrjola@solidboot.com>
Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
+Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
Kay Sievers <kay.sievers@vrfy.org>
Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
@@ -249,6 +251,7 @@ Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
+Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
Mathieu Othacehe <m.othacehe@gmail.com>
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
diff --git a/Documentation/ABI/testing/securityfs-secrets-coco b/Documentation/ABI/testing/securityfs-secrets-coco
new file mode 100644
index 000000000000..f2b6909155f9
--- /dev/null
+++ b/Documentation/ABI/testing/securityfs-secrets-coco
@@ -0,0 +1,51 @@
+What: security/secrets/coco
+Date: February 2022
+Contact: Dov Murik <dovmurik@linux.ibm.com>
+Description:
+ Exposes confidential computing (coco) EFI secrets to
+ userspace via securityfs.
+
+ EFI can declare memory area used by confidential computing
+ platforms (such as AMD SEV and SEV-ES) for secret injection by
+ the Guest Owner during VM's launch. The secrets are encrypted
+ by the Guest Owner and decrypted inside the trusted enclave,
+ and therefore are not readable by the untrusted host.
+
+ The efi_secret module exposes the secrets to userspace. Each
+ secret appears as a file under <securityfs>/secrets/coco,
+ where the filename is the GUID of the entry in the secrets
+ table. This module is loaded automatically by the EFI driver
+ if the EFI secret area is populated.
+
+ Two operations are supported for the files: read and unlink.
+ Reading the file returns the content of secret entry.
+ Unlinking the file overwrites the secret data with zeroes and
+ removes the entry from the filesystem. A secret cannot be read
+ after it has been unlinked.
+
+ For example, listing the available secrets::
+
+ # modprobe efi_secret
+ # ls -l /sys/kernel/security/secrets/coco
+ -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+ -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+ -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+ -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+ Reading the secret data by reading a file::
+
+ # cat /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+ the-content-of-the-secret-data
+
+ Wiping a secret by unlinking a file::
+
+ # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+ # ls -l /sys/kernel/security/secrets/coco
+ -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+ -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+ -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+ Note: The binary format of the secrets table injected by the
+ Guest Owner is described in
+ drivers/virt/coco/efi_secret/efi_secret.c under "Structure of
+ the EFI secret area".
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
index 05482374a741..bb4681a01811 100644
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -9,8 +9,9 @@ Description: Shows all enabled kernel features.
What: /sys/fs/erofs/<disk>/sync_decompress
Date: November 2021
Contact: "Huang Jianan" <huangjianan@oppo.com>
-Description: Control strategy of sync decompression
+Description: Control strategy of sync decompression:
+
- 0 (default, auto): enable for readpage, and enable for
- readahead on atomic contexts only,
+ readahead on atomic contexts only.
- 1 (force on): enable for readpage and readahead.
- 2 (force off): disable for all situations.
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
index f4efd6897b09..b34990c7c377 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -973,7 +973,7 @@ The ``->dynticks`` field counts the corresponding CPU's transitions to
and from either dyntick-idle or user mode, so that this counter has an
even value when the CPU is in dyntick-idle mode or user mode and an odd
value otherwise. The transitions to/from user mode need to be counted
-for user mode adaptive-ticks support (see timers/NO_HZ.txt).
+for user mode adaptive-ticks support (see Documentation/timers/no_hz.rst).
The ``->rcu_need_heavy_qs`` field is used to record the fact that the
RCU core code would really like to see a quiescent state from the
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
index 6f89cf1e567d..c9c957c85bac 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
@@ -406,7 +406,7 @@ In earlier implementations, the task requesting the expedited grace
period also drove it to completion. This straightforward approach had
the disadvantage of needing to account for POSIX signals sent to user
tasks, so more recent implemementations use the Linux kernel's
-`workqueues <https://www.kernel.org/doc/Documentation/core-api/workqueue.rst>`__.
+workqueues (see Documentation/core-api/workqueue.rst).
The requesting task still does counter snapshotting and funnel-lock
processing, but the task reaching the top of the funnel lock does a
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 45278e2974c0..04ed8bf27a0e 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -370,8 +370,8 @@ pointer fetched by rcu_dereference() may not be used outside of the
outermost RCU read-side critical section containing that
rcu_dereference(), unless protection of the corresponding data
element has been passed from RCU to some other synchronization
-mechanism, most commonly locking or `reference
-counting <https://www.kernel.org/doc/Documentation/RCU/rcuref.txt>`__.
+mechanism, most commonly locking or reference counting
+(see ../../rcuref.rst).
.. |high-quality implementation of C11 memory_order_consume [PDF]| replace:: high-quality implementation of C11 ``memory_order_consume`` [PDF]
.. _high-quality implementation of C11 memory_order_consume [PDF]: http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf
@@ -2654,6 +2654,38 @@ synchronize_rcu(), and rcu_barrier(), respectively. In
three APIs are therefore implemented by separate functions that check
for voluntary context switches.
+Tasks Rude RCU
+~~~~~~~~~~~~~~
+
+Some forms of tracing need to wait for all preemption-disabled regions
+of code running on any online CPU, including those executed when RCU is
+not watching. This means that synchronize_rcu() is insufficient, and
+Tasks Rude RCU must be used instead. This flavor of RCU does its work by
+forcing a workqueue to be scheduled on each online CPU, hence the "Rude"
+moniker. And this operation is considered to be quite rude by real-time
+workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
+by battery-powered systems that don't want their idle CPUs to be awakened.
+
+The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
+consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
+and rcu_barrier_tasks_rude().
+
+Tasks Trace RCU
+~~~~~~~~~~~~~~~
+
+Some forms of tracing need to sleep in readers, but cannot tolerate
+SRCU's read-side overhead, which includes a full memory barrier in both
+srcu_read_lock() and srcu_read_unlock(). This need is handled by a
+Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
+readers. Real-time systems that cannot tolerate IPIs may build their
+kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
+the expense of adding full memory barriers to the read-side primitives.
+
+The tasks-trace-RCU API is also reasonably compact,
+consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
+rcu_read_lock_trace_held(), call_rcu_tasks_trace(),
+synchronize_rcu_tasks_trace(), and rcu_barrier_tasks_trace().
+
Possible Future Changes
-----------------------
diff --git a/Documentation/RCU/arrayRCU.rst b/Documentation/RCU/arrayRCU.rst
index 4051ea3871ef..a5f2ff8fc54c 100644
--- a/Documentation/RCU/arrayRCU.rst
+++ b/Documentation/RCU/arrayRCU.rst
@@ -33,8 +33,8 @@ Situation 1: Hash Tables
Hash tables are often implemented as an array, where each array entry
has a linked-list hash chain. Each hash chain can be protected by RCU
-as described in the listRCU.txt document. This approach also applies
-to other array-of-list situations, such as radix trees.
+as described in listRCU.rst. This approach also applies to other
+array-of-list situations, such as radix trees.
.. _static_arrays:
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index f4545b7c9a63..42cc5d891bd2 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -140,8 +140,7 @@ over a rather long period of time, but improvements are always welcome!
prevents destructive compiler optimizations. However,
with a bit of devious creativity, it is possible to
mishandle the return value from rcu_dereference().
- Please see rcu_dereference.txt in this directory for
- more information.
+ Please see rcu_dereference.rst for more information.
The rcu_dereference() primitive is used by the
various "_rcu()" list-traversal primitives, such
@@ -151,7 +150,7 @@ over a rather long period of time, but improvements are always welcome!
primitives. This is particularly useful in code that
is common to readers and updaters. However, lockdep
will complain if you access rcu_dereference() outside
- of an RCU read-side critical section. See lockdep.txt
+ of an RCU read-side critical section. See lockdep.rst
to learn what to do about this.
Of course, neither rcu_dereference() nor the "_rcu()"
@@ -323,7 +322,7 @@ over a rather long period of time, but improvements are always welcome!
primitives when the update-side lock is held is that doing so
can be quite helpful in reducing code bloat when common code is
shared between readers and updaters. Additional primitives
- are provided for this case, as discussed in lockdep.txt.
+ are provided for this case, as discussed in lockdep.rst.
One exception to this rule is when data is only ever added to
the linked data structure, and is never removed during any
@@ -480,4 +479,4 @@ over a rather long period of time, but improvements are always welcome!
both rcu_barrier() and synchronize_rcu(), if necessary, using
something like workqueues to to execute them concurrently.
- See rcubarrier.txt for more information.
+ See rcubarrier.rst for more information.
diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst
index 0e03c6ef3147..3cfe01ba9a49 100644
--- a/Documentation/RCU/rcu.rst
+++ b/Documentation/RCU/rcu.rst
@@ -10,9 +10,8 @@ A "grace period" must elapse between the two parts, and this grace period
must be long enough that any readers accessing the item being deleted have
since dropped their references. For example, an RCU-protected deletion
from a linked list would first remove the item from the list, wait for
-a grace period to elapse, then free the element. See the
-:ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` for more information on
-using RCU with linked lists.
+a grace period to elapse, then free the element. See listRCU.rst for more
+information on using RCU with linked lists.
Frequently Asked Questions
--------------------------
@@ -50,7 +49,7 @@ Frequently Asked Questions
- If I am running on a uniprocessor kernel, which can only do one
thing at a time, why should I wait for a grace period?
- See :ref:`Documentation/RCU/UP.rst <up_doc>` for more information.
+ See UP.rst for more information.
- How can I see where RCU is currently used in the Linux kernel?
@@ -64,13 +63,13 @@ Frequently Asked Questions
- What guidelines should I follow when writing code that uses RCU?
- See the checklist.txt file in this directory.
+ See checklist.rst.
- Why the name "RCU"?
"RCU" stands for "read-copy update".
- :ref:`Documentation/RCU/listRCU.rst <list_rcu_doc>` has more information on where
- this name came from, search for "read-copy update" to find it.
+ listRCU.rst has more information on where this name came from, search
+ for "read-copy update" to find it.
- I hear that RCU is patented? What is with that?
diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index a9fc774bc400..ca4692775ad4 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -8,7 +8,7 @@ This section describes how to use hlist_nulls to
protect read-mostly linked lists and
objects using SLAB_TYPESAFE_BY_RCU allocations.
-Please read the basics in Documentation/RCU/listRCU.rst
+Please read the basics in listRCU.rst.
Using 'nulls'
=============
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
index 78404625bad2..794837eb519b 100644
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
Stall-warning messages may be enabled and disabled completely via
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+ Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+ the expedited grace period. This parameter defines the period
+ of time that RCU will wait from the beginning of an expedited
+ grace period until it issues an RCU CPU stall warning. This time
+ period is normally 20 milliseconds on Android devices. A zero
+ value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+ after conversion to milliseconds.
+
+ This configuration parameter may be changed at runtime via the
+ /sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+ this parameter is checked only at the beginning of a cycle. If you
+ are in a current stall cycle, setting it to a new value will change
+ the timeout for the -next- stall.
+
+ Stall-warning messages may be enabled and disabled completely via
+ /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
RCU_STALL_DELAY_DELTA
---------------------
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index c34d2212eaca..77ea260efd12 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -224,7 +224,7 @@ synchronize_rcu()
be delayed. This property results in system resilience in face
of denial-of-service attacks. Code using call_rcu() should limit
update rate in order to gain this same sort of resilience. See
- checklist.txt for some approaches to limiting the update rate.
+ checklist.rst for some approaches to limiting the update rate.
rcu_assign_pointer()
^^^^^^^^^^^^^^^^^^^^
@@ -318,7 +318,7 @@ rcu_dereference()
must prohibit. The rcu_dereference_protected() variant takes
a lockdep expression to indicate which locks must be acquired
by the caller. If the indicated protection is not provided,
- a lockdep splat is emitted. See Documentation/RCU/Design/Requirements/Requirements.rst
+ a lockdep splat is emitted. See Design/Requirements/Requirements.rst
and the API's code comments for more details and example usage.
.. [2] If the list_for_each_entry_rcu() instance might be used by
@@ -399,8 +399,7 @@ for specialized uses, but are relatively uncommon.
This section shows a simple use of the core RCU API to protect a
global pointer to a dynamically allocated structure. More-typical
-uses of RCU may be found in :ref:`listRCU.rst <list_rcu_doc>`,
-:ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst <NMI_rcu_doc>`.
+uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
::
struct foo {
@@ -482,10 +481,9 @@ So, to sum up:
RCU read-side critical sections that might be referencing that
data item.
-See checklist.txt for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in :ref:`listRCU.rst
-<list_rcu_doc>`, :ref:`arrayRCU.rst <array_rcu_doc>`, and :ref:`NMI-RCU.rst
-<NMI_rcu_doc>`.
+See checklist.rst for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.rst,
+arrayRCU.rst, and NMI-RCU.rst.
.. _4_whatisRCU:
@@ -579,7 +577,7 @@ to avoid having to write your own callback::
kfree_rcu(old_fp, rcu);
-Again, see checklist.txt for additional rules governing the use of RCU.
+Again, see checklist.rst for additional rules governing the use of RCU.
.. _5_whatisRCU:
@@ -663,7 +661,7 @@ been able to write-acquire the lock otherwise. The smp_mb__after_spinlock()
promotes synchronize_rcu() to a full memory barrier in compliance with
the "Memory-Barrier Guarantees" listed in:
- Documentation/RCU/Design/Requirements/Requirements.rst
+ Design/Requirements/Requirements.rst
It is possible to nest rcu_read_lock(), since reader-writer locks may
be recursively acquired. Note also that rcu_read_lock() is immune
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3f1cc5e317ed..297c852c53f8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2622,14 +2622,14 @@
when set.
Format: <int>
- libata.force= [LIBATA] Force configurations. The format is comma-
- separated list of "[ID:]VAL" where ID is
- PORT[.DEVICE]. PORT and DEVICE are decimal numbers
- matching port, link or device. Basically, it matches
- the ATA ID string printed on console by libata. If
- the whole ID part is omitted, the last PORT and DEVICE
- values are used. If ID hasn't been specified yet, the
- configuration applies to all ports, links and devices.
+ libata.force= [LIBATA] Force configurations. The format is a comma-
+ separated list of "[ID:]VAL" where ID is PORT[.DEVICE].
+ PORT and DEVICE are decimal numbers matching port, link
+ or device. Basically, it matches the ATA ID string
+ printed on console by libata. If the whole ID part is
+ omitted, the last PORT and DEVICE values are used. If
+ ID hasn't been specified yet, the configuration applies
+ to all ports, links and devices.
If only DEVICE is omitted, the parameter applies to
the port and all links and devices behind it. DEVICE
@@ -2639,7 +2639,7 @@
host link and device attached to it.
The VAL specifies the configuration to force. As long
- as there's no ambiguity shortcut notation is allowed.
+ as there is no ambiguity, shortcut notation is allowed.
For example, both 1.5 and 1.5G would work for 1.5Gbps.
The following configurations can be forced.
@@ -2652,19 +2652,58 @@
udma[/][16,25,33,44,66,100,133] notation is also
allowed.
+ * nohrst, nosrst, norst: suppress hard, soft and both
+ resets.
+
+ * rstonce: only attempt one reset during hot-unplug
+ link recovery.
+
+ * [no]dbdelay: Enable or disable the extra 200ms delay
+ before debouncing a link PHY and device presence
+ detection.
+
* [no]ncq: Turn on or off NCQ.
- * [no]ncqtrim: Turn off queued DSM TRIM.
+ * [no]ncqtrim: Enable or disable queued DSM TRIM.
+
+ * [no]ncqati: Enable or disable NCQ trim on ATI chipset.
+
+ * [no]trim: Enable or disable (unqueued) TRIM.
+
+ * trim_zero: Indicate that TRIM command zeroes data.
+
+ * max_trim_128m: Set 128M maximum trim size limit.
+
+ * [no]dma: Turn on or off DMA transfers.
+
+ * atapi_dmadir: Enable ATAPI DMADIR bridge support.
+
+ * atapi_mod16_dma: Enable the use of ATAPI DMA for
+ commands that are not a multiple of 16 bytes.
- * nohrst, nosrst, norst: suppress hard, soft
- and both resets.
+ * [no]dmalog: Enable or disable the use of the
+ READ LOG DMA EXT command to access logs.
- * rstonce: only attempt one reset during
- hot-unplug link recovery
+ * [no]iddevlog: Enable or disable access to the
+ identify device data log.
- * dump_id: dump IDENTIFY data.
+ * [no]logdir: Enable or disable access to the general
+ purpose log directory.
- * atapi_dmadir: Enable ATAPI DMADIR bridge support
+ * max_sec_128: Set transfer size limit to 128 sectors.
+
+ * max_sec_1024: Set or clear transfer size limit to
+ 1024 sectors.
+
+ * max_sec_lba48: Set or clear transfer size limit to
+ 65535 sectors.
+
+ * [no]lpm: Enable or disable link power management.
+
+ * [no]setxfer: Indicate if transfer speed mode setting
+ should be skipped.
+
+ * dump_id: Dump IDENTIFY data.
* disable: Disable this device.
@@ -4893,6 +4932,18 @@
rcupdate.rcu_cpu_stall_timeout= [KNL]
Set timeout for RCU CPU stall warning messages.
+ The value is in seconds and the maximum allowed
+ value is 300 seconds.
+
+ rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+ Set timeout for expedited RCU CPU stall warning
+ messages. The value is in milliseconds
+ and the maximum allowed value is 21000
+ milliseconds. Please note that this value is
+ adjusted to an arch timer tick resolution.
+ Setting this to zero causes the value from
+ rcupdate.rcu_cpu_stall_timeout to be used (after
+ conversion from seconds to milliseconds).
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
@@ -4955,10 +5006,34 @@
number avoids disturbing real-time workloads,
but lengthens grace periods.
+ rcupdate.rcu_task_stall_info= [KNL]
+ Set initial timeout in jiffies for RCU task stall
+ informational messages, which give some indication
+ of the problem for those not patient enough to
+ wait for ten minutes. Informational messages are
+ only printed prior to the stall-warning message
+ for a given grace period. Disable with a value
+ less than or equal to zero. Defaults to ten
+ seconds. A change in value does not take effect
+ until the beginning of the next grace period.
+
+ rcupdate.rcu_task_stall_info_mult= [KNL]
+ Multiplier for time interval between successive
+ RCU task stall informational messages for a given
+ RCU tasks grace period. This value is clamped
+ to one through ten, inclusive. It defaults to
+ the value three, so that the first informational
+ message is printed 10 seconds into the grace
+ period, the second at 40 seconds, the third at
+ 160 seconds, and then the stall warning at 600
+ seconds would prevent a fourth at 640 seconds.
+
rcupdate.rcu_task_stall_timeout= [KNL]
- Set timeout in jiffies for RCU task stall warning
- messages. Disable with a value less than or equal
- to zero.
+ Set timeout in jiffies for RCU task stall
+ warning messages. Disable with a value less
+ than or equal to zero. Defaults to ten minutes.
+ A change in value does not take effect until
+ the beginning of the next grace period.
rcupdate.rcu_self_test= [KNL]
Run the RCU early boot self tests
@@ -5377,6 +5452,17 @@
smart2= [HW]
Format: <io1>[,<io2>[,...,<io8>]]
+ smp.csd_lock_timeout= [KNL]
+ Specify the period of time in milliseconds
+ that smp_call_function() and friends will wait
+ for a CPU to release the CSD lock. This is
+ useful when diagnosing bugs involving CPUs
+ disabling interrupts for extended periods
+ of time. Defaults to 5,000 milliseconds, and
+ setting a value of zero disables this feature.
+ This feature may be more efficiently disabled
+ using the csdlock_debug- kernel parameter.
+
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port
smsc-ircc2.ircc_sir= [HW] SIR base I/O port
@@ -5608,6 +5694,30 @@
off: Disable mitigation and remove
performance impact to RDRAND and RDSEED
+ srcutree.big_cpu_lim [KNL]
+ Specifies the number of CPUs constituting a
+ large system, such that srcu_struct structures
+ should immediately allocate an srcu_node array.
+ This kernel-boot parameter defaults to 128,
+ but takes effect only when the low-order four
+ bits of srcutree.convert_to_big is equal to 3
+ (decide at boot).
+
+ srcutree.convert_to_big [KNL]
+ Specifies under what conditions an SRCU tree
+ srcu_struct structure will be converted to big
+ form, that is, with an rcu_node tree:
+
+ 0: Never.
+ 1: At init_srcu_struct() time.
+ 2: When rcutorture decides to.
+ 3: Decide at boot time (default).
+ 0x1X: Above plus if high contention.
+
+ Either way, the srcu_node tree will be sized based
+ on the actual runtime number of CPUs (nr_cpu_ids)
+ instead of the compile-time CONFIG_NR_CPUS.
+
srcutree.counter_wrap_check [KNL]
Specifies how frequently to check for
grace-period sequence counter wrap for the
@@ -5625,6 +5735,14 @@
expediting. Set to zero to disable automatic
expediting.
+ srcutree.small_contention_lim [KNL]
+ Specifies the number of update-side contention
+ events per jiffy will be tolerated before
+ initiating a conversion of an srcu_struct
+ structure to big form. Note that the value of
+ srcutree.convert_to_big must have the 0x10 bit
+ set for contention-based conversions to occur.
+
ssbd= [ARM64,HW]
Speculative Store Bypass Disable control
diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index dd27f78d7608..dbae47bba25e 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -228,10 +228,10 @@ Core dump support
-----------------
The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
-in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
+in the core file as additional ``PT_AARCH64_MEMTAG_MTE`` segments. The
program header for such segment is defined as:
-:``p_type``: ``PT_ARM_MEMTAG_MTE``
+:``p_type``: ``PT_AARCH64_MEMTAG_MTE``
:``p_flags``: 0
:``p_offset``: segment file offset
:``p_vaddr``: segment virtual address, same as the corresponding
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 466cb9e89047..d27db84d585e 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -189,6 +189,9 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 |
+----------------+-----------------+-----------------+-----------------------------+
+| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 |
++----------------+-----------------+-----------------+-----------------------------+
+
+----------------+-----------------+-----------------+-----------------------------+
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
+----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 52ea7b6b2fe8..7964fe134277 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -218,7 +218,6 @@ current *struct* is::
int (*tray_move)(struct cdrom_device_info *, int);
int (*lock_door)(struct cdrom_device_info *, int);
int (*select_speed)(struct cdrom_device_info *, int);
- int (*select_disc)(struct cdrom_device_info *, int);
int (*get_last_session) (struct cdrom_device_info *,
struct cdrom_multisession *);
int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@@ -421,15 +420,6 @@ return value indicates an error.
::
- int select_disc(struct cdrom_device_info *cdi, int number)
-
-If the drive can store multiple discs (a juke-box) this function
-will perform disc selection. It should return the number of the
-selected disc on success, a negative value on error. Currently, only
-the ide-cd driver supports this functionality.
-
-::
-
int get_last_session(struct cdrom_device_info *cdi,
struct cdrom_multisession *ms_info)
diff --git a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
index c060c7914cae..c4e4a9eab658 100644
--- a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
+++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml
@@ -26,6 +26,7 @@ properties:
- items:
- enum:
- renesas,sata-r8a774b1 # RZ/G2N
+ - renesas,sata-r8a774e1 # RZ/G2H
- renesas,sata-r8a7795 # R-Car H3
- renesas,sata-r8a77965 # R-Car M3-N
- const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2
diff --git a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
index 625f573a7b90..458c7645ee68 100644
--- a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
@@ -55,8 +55,6 @@ allOf:
then:
properties:
clocks:
- minItems: 7
- maxItems: 7
items:
- description: 32k osc
- description: 25m osc
@@ -66,8 +64,6 @@ allOf:
- description: ext3 clock input
- description: ext4 clock input
clock-names:
- minItems: 7
- maxItems: 7
items:
- const: ckil
- const: osc_25m
diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
index 0c15afa2214c..016a4f378b9b 100644
--- a/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
+++ b/Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
@@ -22,7 +22,16 @@ properties:
const: microchip,mpfs-clkcfg
reg:
- maxItems: 1
+ items:
+ - description: |
+ clock config registers:
+ These registers contain enable, reset & divider tables for the, cpu,
+ axi, ahb and rtc/mtimer reference clocks as well as enable and reset
+ for the peripheral clocks.
+ - description: |
+ mss pll dri registers:
+ Block of registers responsible for dynamic reconfiguration of the mss
+ pll
clocks:
maxItems: 1
@@ -51,7 +60,7 @@ examples:
#size-cells = <2>;
clkcfg: clock-controller@20002000 {
compatible = "microchip,mpfs-clkcfg";
- reg = <0x0 0x20002000 0x0 0x1000>;
+ reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
clocks = <&ref>;
#clock-cells = <1>;
};
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
index a51baf8a4c76..bb9dbfb9beaf 100644
--- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.yaml
@@ -95,7 +95,6 @@ then:
properties:
clocks:
minItems: 1
- maxItems: 4
items:
- description: Functional clock
- description: EXTAL input clock
@@ -104,7 +103,6 @@ then:
clock-names:
minItems: 1
- maxItems: 4
items:
- const: fck
# The LVDS encoder can use the EXTAL or DU_DOTCLKINx clocks.
@@ -128,12 +126,10 @@ then:
else:
properties:
clocks:
- maxItems: 1
items:
- description: Functional clock
clock-names:
- maxItems: 1
items:
- const: fck
diff --git a/Documentation/devicetree/bindings/display/renesas,du.yaml b/Documentation/devicetree/bindings/display/renesas,du.yaml
index 56cedcd6d576..b3e588022082 100644
--- a/Documentation/devicetree/bindings/display/renesas,du.yaml
+++ b/Documentation/devicetree/bindings/display/renesas,du.yaml
@@ -109,7 +109,6 @@ allOf:
properties:
clocks:
minItems: 1
- maxItems: 3
items:
- description: Functional clock
- description: DU_DOTCLKIN0 input clock
@@ -117,7 +116,6 @@ allOf:
clock-names:
minItems: 1
- maxItems: 3
items:
- const: du.0
- pattern: '^dclkin\.[01]$'
@@ -159,7 +157,6 @@ allOf:
properties:
clocks:
minItems: 2
- maxItems: 4
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -168,7 +165,6 @@ allOf:
clock-names:
minItems: 2
- maxItems: 4
items:
- const: du.0
- const: du.1
@@ -216,7 +212,6 @@ allOf:
properties:
clocks:
minItems: 2
- maxItems: 4
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -225,7 +220,6 @@ allOf:
clock-names:
minItems: 2
- maxItems: 4
items:
- const: du.0
- const: du.1
@@ -271,7 +265,6 @@ allOf:
properties:
clocks:
minItems: 2
- maxItems: 4
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -280,7 +273,6 @@ allOf:
clock-names:
minItems: 2
- maxItems: 4
items:
- const: du.0
- const: du.1
@@ -327,7 +319,6 @@ allOf:
properties:
clocks:
minItems: 2
- maxItems: 4
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -336,7 +327,6 @@ allOf:
clock-names:
minItems: 2
- maxItems: 4
items:
- const: du.0
- const: du.1
@@ -386,7 +376,6 @@ allOf:
properties:
clocks:
minItems: 3
- maxItems: 6
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -397,7 +386,6 @@ allOf:
clock-names:
minItems: 3
- maxItems: 6
items:
- const: du.0
- const: du.1
@@ -448,7 +436,6 @@ allOf:
properties:
clocks:
minItems: 4
- maxItems: 8
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -461,7 +448,6 @@ allOf:
clock-names:
minItems: 4
- maxItems: 8
items:
- const: du.0
- const: du.1
@@ -525,7 +511,6 @@ allOf:
properties:
clocks:
minItems: 3
- maxItems: 6
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -536,7 +521,6 @@ allOf:
clock-names:
minItems: 3
- maxItems: 6
items:
- const: du.0
- const: du.1
@@ -596,7 +580,6 @@ allOf:
properties:
clocks:
minItems: 3
- maxItems: 6
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -607,7 +590,6 @@ allOf:
clock-names:
minItems: 3
- maxItems: 6
items:
- const: du.0
- const: du.1
@@ -666,14 +648,12 @@ allOf:
properties:
clocks:
minItems: 1
- maxItems: 2
items:
- description: Functional clock for DU0
- description: DU_DOTCLKIN0 input clock
clock-names:
minItems: 1
- maxItems: 2
items:
- const: du.0
- const: dclkin.0
@@ -723,7 +703,6 @@ allOf:
properties:
clocks:
minItems: 2
- maxItems: 4
items:
- description: Functional clock for DU0
- description: Functional clock for DU1
@@ -732,7 +711,6 @@ allOf:
clock-names:
minItems: 2
- maxItems: 4
items:
- const: du.0
- const: du.1
@@ -791,7 +769,6 @@ allOf:
- description: Functional clock
clock-names:
- maxItems: 1
items:
- const: du.0
diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index e614fe3187bb..d09d79d7406a 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -29,6 +29,7 @@ properties:
interrupts:
description:
Interrupt lines for each GPI instance
+ minItems: 1
maxItems: 13
"#dma-cells":
diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml
index 36f649938fb7..a6f1fa75a67c 100644
--- a/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml
@@ -58,10 +58,9 @@ patternProperties:
description: |
The value (two's complement) to be programmed in the channel specific N correction register.
For remote channels only.
- $ref: /schemas/types.yaml#/definitions/uint32
- items:
- minimum: 0
- maximum: 255
+ $ref: /schemas/types.yaml#/definitions/int32
+ minimum: -128
+ maximum: 127
required:
- reg
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
index 4d6074518b5c..fa8da42cb1e6 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
@@ -138,7 +138,6 @@ allOf:
- const: bus
- const: adc
minItems: 1
- maxItems: 2
interrupts:
items:
@@ -170,7 +169,6 @@ allOf:
- const: bus
- const: adc
minItems: 1
- maxItems: 2
interrupts:
items:
diff --git a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
index b1770640f94b..03ebd2665d07 100644
--- a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
+++ b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Mediatek's Keypad Controller device tree bindings
maintainers:
- - Fengping Yu <fengping.yu@mediatek.com>
+ - Mattijs Korpershoek <mkorpershoek@baylibre.com>
allOf:
- $ref: "/schemas/input/matrix-keymap.yaml#"
diff --git a/Documentation/devicetree/bindings/leds/leds-mt6360.yaml b/Documentation/devicetree/bindings/leds/leds-mt6360.yaml
index b2fe6eb89389..10f95bf1d666 100644
--- a/Documentation/devicetree/bindings/leds/leds-mt6360.yaml
+++ b/Documentation/devicetree/bindings/leds/leds-mt6360.yaml
@@ -43,8 +43,6 @@ patternProperties:
- 4 # LED output FLASH1
- 5 # LED output FLASH2
-unevaluatedProperties: false
-
required:
- compatible
- "#address-cells"
diff --git a/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt b/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt
index 692300117c64..9d837535637b 100644
--- a/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt
+++ b/Documentation/devicetree/bindings/mfd/atmel-flexcom.txt
@@ -54,7 +54,7 @@ flexcom@f8034000 {
clock-names = "spi_clk";
atmel,fifo-size = <32>;
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at25f512b";
reg = <0>;
spi-max-frequency = <20000000>;
diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml
index f3f4d5b02744..fe0270207622 100644
--- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.yaml
@@ -202,22 +202,17 @@ allOf:
clocks:
items:
- description: module clock
- minItems: 1
- maxItems: 1
else:
properties:
clocks:
items:
- description: module clock
- description: timeout clock
- minItems: 2
- maxItems: 2
+
clock-names:
items:
- const: sdhci
- const: tmclk
- minItems: 2
- maxItems: 2
required:
- clock-names
diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
index 9d764e654e1d..849aeae319a9 100644
--- a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
@@ -147,8 +147,6 @@ allOf:
- description: SoC gpmi io clock
- description: SoC gpmi bch apb clock
clock-names:
- minItems: 2
- maxItems: 2
items:
- const: gpmi_io
- const: gpmi_bch_apb
diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
index 8bad328b184d..51aa89ac7e85 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
@@ -80,8 +80,6 @@ if:
then:
properties:
interrupts:
- minItems: 4
- maxItems: 4
items:
- description: Error and status IRQ
- description: Message object IRQ
@@ -91,7 +89,6 @@ then:
else:
properties:
interrupts:
- maxItems: 1
items:
- description: Error and status IRQ
diff --git a/Documentation/devicetree/bindings/net/dsa/realtek.yaml b/Documentation/devicetree/bindings/net/dsa/realtek.yaml
index 8756060895a8..99ee4b5b9346 100644
--- a/Documentation/devicetree/bindings/net/dsa/realtek.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/realtek.yaml
@@ -27,32 +27,25 @@ description:
The realtek-mdio driver is an MDIO driver and it must be inserted inside
an MDIO node.
+ The compatible string is only used to identify which (silicon) family the
+ switch belongs to. Roughly speaking, a family is any set of Realtek switches
+ whose chip identification register(s) have a common location and semantics.
+ The different models in a given family can be automatically disambiguated by
+ parsing the chip identification register(s) according to the given family,
+ avoiding the need for a unique compatible string for each model.
+
properties:
compatible:
enum:
- realtek,rtl8365mb
- - realtek,rtl8366
- realtek,rtl8366rb
- - realtek,rtl8366s
- - realtek,rtl8367
- - realtek,rtl8367b
- - realtek,rtl8367rb
- - realtek,rtl8367s
- - realtek,rtl8368s
- - realtek,rtl8369
- - realtek,rtl8370
description: |
- realtek,rtl8365mb: 4+1 ports
- realtek,rtl8366: 5+1 ports
- realtek,rtl8366rb: 5+1 ports
- realtek,rtl8366s: 5+1 ports
- realtek,rtl8367:
- realtek,rtl8367b:
- realtek,rtl8367rb: 5+2 ports
- realtek,rtl8367s: 5+2 ports
- realtek,rtl8368s: 8 ports
- realtek,rtl8369: 8+1 ports
- realtek,rtl8370: 8+2 ports
+ realtek,rtl8365mb:
+ Use with models RTL8363NB, RTL8363NB-VB, RTL8363SC, RTL8363SC-VB,
+ RTL8364NB, RTL8364NB-VB, RTL8365MB, RTL8366SC, RTL8367RB-VB, RTL8367S,
+ RTL8367SB, RTL8370MB, RTL8310SR
+ realtek,rtl8366rb:
+ Use with models RTL8366RB, RTL8366S
mdc-gpios:
description: GPIO line for the MDC clock line.
@@ -335,7 +328,7 @@ examples:
#size-cells = <0>;
switch@29 {
- compatible = "realtek,rtl8367s";
+ compatible = "realtek,rtl8365mb";
reg = <29>;
reset-gpios = <&gpio2 20 GPIO_ACTIVE_LOW>;
diff --git a/Documentation/devicetree/bindings/pci/apple,pcie.yaml b/Documentation/devicetree/bindings/pci/apple,pcie.yaml
index 7f01e15fc81c..daf602ac0d0f 100644
--- a/Documentation/devicetree/bindings/pci/apple,pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/apple,pcie.yaml
@@ -142,7 +142,6 @@ examples:
device_type = "pci";
reg = <0x0 0x0 0x0 0x0 0x0>;
reset-gpios = <&pinctrl_ap 152 0>;
- max-link-speed = <2>;
#address-cells = <3>;
#size-cells = <2>;
@@ -153,7 +152,6 @@ examples:
device_type = "pci";
reg = <0x800 0x0 0x0 0x0 0x0>;
reset-gpios = <&pinctrl_ap 153 0>;
- max-link-speed = <2>;
#address-cells = <3>;
#size-cells = <2>;
@@ -164,7 +162,6 @@ examples:
device_type = "pci";
reg = <0x1000 0x0 0x0 0x0 0x0>;
reset-gpios = <&pinctrl_ap 33 0>;
- max-link-speed = <1>;
#address-cells = <3>;
#size-cells = <2>;
diff --git a/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml
index cb1aa325336f..435b971dfd9b 100644
--- a/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml
@@ -102,19 +102,17 @@ if:
then:
properties:
reg:
- maxItems: 2
+ minItems: 2
+
reg-names:
- items:
- - const: "phy"
- - const: "phy-ctrl"
+ minItems: 2
else:
properties:
reg:
maxItems: 1
+
reg-names:
maxItems: 1
- items:
- - const: "phy"
required:
- compatible
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
index 57b68d6c7c70..3666ac5b6518 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
@@ -33,7 +33,7 @@ patternProperties:
$ref: "/schemas/types.yaml#/definitions/string"
enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC, ESPI, ESPIALT,
- FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3,
+ FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3,
GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5,
GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16,
I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5,
@@ -58,7 +58,7 @@ patternProperties:
$ref: "/schemas/types.yaml#/definitions/string"
enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15, ADC2,
ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1, EMMCG4,
- EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP,
+ EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWQSPI, FWSPIABR, FWSPID, FWSPIWP,
GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1, I2C10,
I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5,
diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
index 4b22a9e3a447..f5a121311f61 100644
--- a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
@@ -52,11 +52,19 @@ properties:
hardware supporting it the pull strength in Ohm.
drive-push-pull:
- type: boolean
+ oneOf:
+ - type: boolean
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 0, 1 ]
+ deprecated: true
description: drive actively high and low
drive-open-drain:
- type: boolean
+ oneOf:
+ - type: boolean
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ const: 1 # No known cases of 0
+ deprecated: true
description: drive with open drain
drive-open-source:
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
index 28725c5467fc..edb411be0390 100644
--- a/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt5190a-regulator.yaml
@@ -58,7 +58,7 @@ properties:
type: object
$ref: regulator.yaml#
description: |
- regulator description for buck1 and buck4.
+ regulator description for buck1 to buck4, and ldo.
properties:
regulator-allowed-modes:
diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
index 0b767fec39d8..6b38bd7eb3b4 100644
--- a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
@@ -71,7 +71,6 @@ allOf:
then:
properties:
clock-output-names:
- minItems: 1
maxItems: 1
- if:
@@ -102,7 +101,6 @@ allOf:
properties:
clock-output-names:
minItems: 3
- maxItems: 3
- if:
properties:
@@ -113,16 +111,12 @@ allOf:
then:
properties:
clocks:
- minItems: 3
- maxItems: 3
items:
- description: Bus clock for register access
- description: 24 MHz oscillator
- description: 32 kHz clock from the CCU
clock-names:
- minItems: 3
- maxItems: 3
items:
- const: bus
- const: hosc
@@ -142,7 +136,6 @@ allOf:
properties:
clocks:
minItems: 3
- maxItems: 4
items:
- description: Bus clock for register access
- description: 24 MHz oscillator
@@ -151,7 +144,6 @@ allOf:
clock-names:
minItems: 3
- maxItems: 4
items:
- const: bus
- const: hosc
@@ -174,14 +166,12 @@ allOf:
then:
properties:
interrupts:
- minItems: 1
maxItems: 1
else:
properties:
interrupts:
minItems: 2
- maxItems: 2
required:
- "#clock-cells"
diff --git a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml
index a2e984ea3553..500c62becd6b 100644
--- a/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml
@@ -31,11 +31,19 @@ properties:
to that of the RTC's count register.
clocks:
- maxItems: 1
+ items:
+ - description: |
+ AHB clock
+ - description: |
+ Reference clock: divided by the prescaler to create a time-based
+ strobe (typically 1 Hz) for the calendar counter. By default, the rtc
+ on the PolarFire SoC shares it's reference with MTIMER so this will
+ be a 1 MHz clock.
clock-names:
items:
- const: rtc
+ - const: rtcref
required:
- compatible
@@ -48,11 +56,12 @@ additionalProperties: false
examples:
- |
+ #include "dt-bindings/clock/microchip,mpfs-clock.h"
rtc@20124000 {
compatible = "microchip,mpfs-rtc";
reg = <0x20124000 0x1000>;
- clocks = <&clkcfg 21>;
- clock-names = "rtc";
+ clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>;
+ clock-names = "rtc", "rtcref";
interrupts = <80>, <81>;
};
...
diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.yaml b/Documentation/devicetree/bindings/serial/samsung_uart.yaml
index d4688e317fc5..901c1e2cea28 100644
--- a/Documentation/devicetree/bindings/serial/samsung_uart.yaml
+++ b/Documentation/devicetree/bindings/serial/samsung_uart.yaml
@@ -100,7 +100,6 @@ allOf:
maxItems: 3
clock-names:
minItems: 2
- maxItems: 3
items:
- const: uart
- pattern: '^clk_uart_baud[0-1]$'
@@ -118,11 +117,8 @@ allOf:
then:
properties:
clocks:
- minItems: 2
maxItems: 2
clock-names:
- minItems: 2
- maxItems: 2
items:
- const: uart
- const: clk_uart_baud0
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
index c21c807b667c..34f6ee9de392 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
@@ -89,7 +89,6 @@ allOf:
properties:
dmas:
minItems: 1
- maxItems: 2
items:
- description: RX DMA Channel
- description: TX DMA Channel
diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml
index 6806f53a4aed..20ea5883b7ff 100644
--- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml
+++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml
@@ -80,7 +80,6 @@ allOf:
then:
properties:
clocks:
- minItems: 6
items:
- description: AUXCLK clock for McASP used by CPB audio
- description: Parent for CPB_McASP auxclk (for 48KHz)
@@ -107,7 +106,6 @@ allOf:
then:
properties:
clocks:
- maxItems: 4
items:
- description: AUXCLK clock for McASP used by CPB audio
- description: Parent for CPB_McASP auxclk (for 48KHz)
diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
index f963204e0b16..1368d90da0e8 100644
--- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml
@@ -67,7 +67,6 @@ then:
properties:
reg:
minItems: 2
- maxItems: 3
items:
- description: TSC1 registers
- description: TSC2 registers
diff --git a/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml b/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml
index d227dea368be..fb45f66d6454 100644
--- a/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml
+++ b/Documentation/devicetree/bindings/ufs/cdns,ufshc.yaml
@@ -43,6 +43,9 @@ properties:
- const: phy_clk
- const: ref_clk
+ power-domains:
+ maxItems: 1
+
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml b/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml
index 340dff8d19c3..9c92defbba01 100644
--- a/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml
+++ b/Documentation/devicetree/bindings/usb/samsung,exynos-usb2.yaml
@@ -62,6 +62,7 @@ required:
- interrupts
- phys
- phy-names
+ - reg
allOf:
- if:
diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index d477e296bda5..311af516a3fd 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -424,12 +424,6 @@ How commands are issued
-----------------------
Internal commands
- First, qc is allocated and initialized using :c:func:`ata_qc_new_init`.
- Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry
- mechanism when qc is not available, internal commands are currently
- issued only during initialization and error recovery, so no other
- command is active and allocation is guaranteed to succeed.
-
Once allocated qc's taskfile is initialized for the command to be
executed. qc currently has two mechanisms to notify completion. One
is via ``qc->complete_fn()`` callback and the other is completion
@@ -447,11 +441,6 @@ SCSI commands
translated. No qc is involved in processing a simulated scmd. The
result is computed right away and the scmd is completed.
- For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a
- qc and the scmd is translated into the qc. SCSI midlayer's
- completion notification function pointer is stored into
- ``qc->scsidone``.
-
``qc->complete_fn()`` callback is used for completion notification. ATA
commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use
:c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone``
diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst
index 54386a010a8d..871d2da7a0a9 100644
--- a/Documentation/filesystems/ext4/attributes.rst
+++ b/Documentation/filesystems/ext4/attributes.rst
@@ -76,7 +76,7 @@ The beginning of an extended attribute block is in
- Checksum of the extended attribute block.
* - 0x14
- \_\_u32
- - h\_reserved[2]
+ - h\_reserved[3]
- Zero.
The checksum is calculated against the FS UUID, the 64-bit block number
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 4a2426f0485a..ad8dc8c040a2 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -235,12 +235,6 @@ offgrpjquota Turn off group journalled quota.
offprjjquota Turn off project journalled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
-whint_mode=%s Control which write hints are passed down to block
- layer. This supports "off", "user-based", and
- "fs-based". In "off" mode (default), f2fs does not pass
- down hints. In "user-based" mode, f2fs tries to pass
- down hints given by users. And in "fs-based" mode, f2fs
- passes down hints with its policy.
alloc_mode=%s Adjust block allocation policy, which supports "reuse"
and "default".
fsync_mode=%s Control the policy of fsync. Currently supports "posix",
@@ -751,70 +745,6 @@ In order to identify whether the data in the victim segment are valid or not,
F2FS manages a bitmap. Each bit represents the validity of a block, and the
bitmap is composed of a bit stream covering whole blocks in main area.
-Write-hint Policy
------------------
-
-1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
-
-2) whint_mode=user-based. F2FS tries to pass down hints given by
-users.
-
-===================== ======================== ===================
-User F2FS Block
-===================== ======================== ===================
-N/A META WRITE_LIFE_NOT_SET
-N/A HOT_NODE "
-N/A WARM_NODE "
-N/A COLD_NODE "
-ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
-extension list " "
-
--- buffered io
-WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE " "
-WRITE_LIFE_MEDIUM " "
-WRITE_LIFE_LONG " "
-
--- direct io
-WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE " WRITE_LIFE_NONE
-WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
-WRITE_LIFE_LONG " WRITE_LIFE_LONG
-===================== ======================== ===================
-
-3) whint_mode=fs-based. F2FS passes down hints with its policy.
-
-===================== ======================== ===================
-User F2FS Block
-===================== ======================== ===================
-N/A META WRITE_LIFE_MEDIUM;
-N/A HOT_NODE WRITE_LIFE_NOT_SET
-N/A WARM_NODE "
-N/A COLD_NODE WRITE_LIFE_NONE
-ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
-extension list " "
-
--- buffered io
-WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
-WRITE_LIFE_NONE " "
-WRITE_LIFE_MEDIUM " "
-WRITE_LIFE_LONG " "
-
--- direct io
-WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
-WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
-WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
-WRITE_LIFE_NONE " WRITE_LIFE_NONE
-WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
-WRITE_LIFE_LONG " WRITE_LIFE_LONG
-===================== ======================== ===================
-
Fallocate(2) Policy
-------------------
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 061744c436d9..6a0dd99786f9 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -1183,85 +1183,7 @@ Provides counts of softirq handlers serviced since boot time, for each CPU.
HRTIMER: 0 0 0 0
RCU: 1678 1769 2178 2250
-
-1.3 IDE devices in /proc/ide
-----------------------------
-
-The subdirectory /proc/ide contains information about all IDE devices of which
-the kernel is aware. There is one subdirectory for each IDE controller, the
-file drivers and a link for each IDE device, pointing to the device directory
-in the controller specific subtree.
-
-The file 'drivers' contains general information about the drivers used for the
-IDE devices::
-
- > cat /proc/ide/drivers
- ide-cdrom version 4.53
- ide-disk version 1.08
-
-More detailed information can be found in the controller specific
-subdirectories. These are named ide0, ide1 and so on. Each of these
-directories contains the files shown in table 1-6.
-
-
-.. table:: Table 1-6: IDE controller info in /proc/ide/ide?
-
- ======= =======================================
- File Content
- ======= =======================================
- channel IDE channel (0 or 1)
- config Configuration (only for PCI/IDE bridge)
- mate Mate name
- model Type/Chipset of IDE controller
- ======= =======================================
-
-Each device connected to a controller has a separate subdirectory in the
-controllers directory. The files listed in table 1-7 are contained in these
-directories.
-
-
-.. table:: Table 1-7: IDE device information
-
- ================ ==========================================
- File Content
- ================ ==========================================
- cache The cache
- capacity Capacity of the medium (in 512Byte blocks)
- driver driver and version
- geometry physical and logical geometry
- identify device identify block
- media media type
- model device identifier
- settings device setup
- smart_thresholds IDE disk management thresholds
- smart_values IDE disk management values
- ================ ==========================================
-
-The most interesting file is ``settings``. This file contains a nice
-overview of the drive parameters::
-
- # cat /proc/ide/ide0/hda/settings
- name value min max mode
- ---- ----- --- --- ----
- bios_cyl 526 0 65535 rw
- bios_head 255 0 255 rw
- bios_sect 63 0 63 rw
- breada_readahead 4 0 127 rw
- bswap 0 0 1 r
- file_readahead 72 0 2097151 rw
- io_32bit 0 0 3 rw
- keepsettings 0 0 1 rw
- max_kb_per_request 122 1 127 rw
- multcount 0 0 8 rw
- nice1 1 0 1 rw
- nowerr 0 0 1 rw
- pio_mode write-only 0 255 w
- slow 0 0 1 rw
- unmaskirq 0 0 1 rw
- using_dma 0 0 1 rw
-
-
-1.4 Networking info in /proc/net
+1.3 Networking info in /proc/net
--------------------------------
The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the
@@ -1340,7 +1262,7 @@ It will contain information that is specific to that bond, such as the
current slaves of the bond, the link status of the slaves, and how
many times the slaves link has failed.
-1.5 SCSI info
+1.4 SCSI info
-------------
If you have a SCSI host adapter in your system, you'll find a subdirectory
@@ -1403,7 +1325,7 @@ AHA-2940 SCSI adapter::
Total transfers 0 (0 reads and 0 writes)
-1.6 Parallel port info in /proc/parport
+1.5 Parallel port info in /proc/parport
---------------------------------------
The directory /proc/parport contains information about the parallel ports of
@@ -1428,7 +1350,7 @@ These directories contain the four files shown in Table 1-10.
number or none).
========= ====================================================================
-1.7 TTY info in /proc/tty
+1.6 TTY info in /proc/tty
-------------------------
Information about the available and actually used tty's can be found in the
@@ -1463,7 +1385,7 @@ To see which tty's are currently in use, you can simply look into the file
unknown /dev/tty 4 1-63 console
-1.8 Miscellaneous kernel statistics in /proc/stat
+1.7 Miscellaneous kernel statistics in /proc/stat
-------------------------------------------------
Various pieces of information about kernel activity are available in the
@@ -1536,7 +1458,7 @@ softirqs serviced; each subsequent column is the total for that particular
softirq.
-1.9 Ext4 file system parameters
+1.8 Ext4 file system parameters
-------------------------------
Information about mounted ext4 file systems can be found in
@@ -1552,7 +1474,7 @@ in Table 1-12, below.
mb_groups details of multiblock allocator buddy cache of free blocks
============== ==========================================================
-1.10 /proc/consoles
+1.9 /proc/consoles
-------------------
Shows registered system console lines.
diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst
index 6b213fe9a33e..394b9f15dce0 100644
--- a/Documentation/filesystems/zonefs.rst
+++ b/Documentation/filesystems/zonefs.rst
@@ -306,8 +306,15 @@ Further notes:
Mount options
-------------
-zonefs define the "errors=<behavior>" mount option to allow the user to specify
-zonefs behavior in response to I/O errors, inode size inconsistencies or zone
+zonefs defines several mount options:
+* errors=<behavior>
+* explicit-open
+
+"errors=<behavior>" option
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The "errors=<behavior>" option mount option allows the user to specify zonefs
+behavior in response to I/O errors, inode size inconsistencies or zone
condition changes. The defined behaviors are as follow:
* remount-ro (default)
@@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a
read-only zone discovered at run-time, as indicated in the previous section.
The size of the zone file is left unchanged from its last updated value.
+"explicit-open" option
+~~~~~~~~~~~~~~~~~~~~~~
+
A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on
the number of zones that can be active, that is, zones that are in the
implicit open, explicit open or closed conditions. This potential limitation
@@ -341,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the
to the device on the last close() of a zone file if the zone is not full nor
empty.
+Runtime sysfs attributes
+------------------------
+
+zonefs defines several sysfs attributes for mounted devices. All attributes
+are user readable and can be found in the directory /sys/fs/zonefs/<dev>/,
+where <dev> is the name of the mounted zoned block device.
+
+The attributes defined are as follows.
+
+* **max_wro_seq_files**: This attribute reports the maximum number of
+ sequential zone files that can be open for writing. This number corresponds
+ to the maximum number of explicitly or implicitly open zones that the device
+ supports. A value of 0 means that the device has no limit and that any zone
+ (any file) can be open for writing and written at any time, regardless of the
+ state of other zones. When the *explicit-open* mount option is used, zonefs
+ will fail any open() system call requesting to open a sequential zone file for
+ writing when the number of sequential zone files already open for writing has
+ reached the *max_wro_seq_files* limit.
+* **nr_wro_seq_files**: This attribute reports the current number of sequential
+ zone files open for writing. When the "explicit-open" mount option is used,
+ this number can never exceed *max_wro_seq_files*. If the *explicit-open*
+ mount option is not used, the reported number can be greater than
+ *max_wro_seq_files*. In such case, it is the responsibility of the
+ application to not write simultaneously more than *max_wro_seq_files*
+ sequential zone files. Failure to do so can result in write errors.
+* **max_active_seq_files**: This attribute reports the maximum number of
+ sequential zone files that are in an active state, that is, sequential zone
+ files that are partially writen (not empty nor full) or that have a zone that
+ is explicitly open (which happens only if the *explicit-open* mount option is
+ used). This number is always equal to the maximum number of active zones that
+ the device supports. A value of 0 means that the mounted device has no limit
+ on the number of sequential zone files that can be active.
+* **nr_active_seq_files**: This attributes reports the current number of
+ sequential zone files that are active. If *max_active_seq_files* is not 0,
+ then the value of *nr_active_seq_files* can never exceed the value of
+ *nr_active_seq_files*, regardless of the use of the *explicit-open* mount
+ option.
+
Zonefs User Space Tools
=======================
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b0024aa7b051..66828293d9cb 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -267,6 +267,13 @@ ipfrag_max_dist - INTEGER
from different IP datagrams, which could result in data corruption.
Default: 64
+bc_forwarding - INTEGER
+ bc_forwarding enables the feature described in rfc1812#section-5.3.5.2
+ and rfc2644. It allows the router to forward directed broadcast.
+ To enable this feature, the 'all' entry and the input interface entry
+ should be set to 1.
+ Default: 0
+
INET peer storage
=================
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index 6f8f36e10e8b..95999302d279 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -244,10 +244,11 @@ disclosure of a particular issue, unless requested by a response team or by
an involved disclosed party. The current ambassadors list:
============= ========================================================
- ARM Grant Likely <grant.likely@arm.com>
AMD Tom Lendacky <tom.lendacky@amd.com>
- IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
- IBM Power Anton Blanchard <anton@linux.ibm.com>
+ Ampere Darren Hart <darren@os.amperecomputing.com>
+ ARM Catalin Marinas <catalin.marinas@arm.com>
+ IBM Power Anton Blanchard <anton@linux.ibm.com>
+ IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <tsoni@codeaurora.org>
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 16335de04e8c..6ed8d2fa6f9e 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -17,3 +17,4 @@ Security Documentation
tpm/index
digsig
landlock
+ secrets/index
diff --git a/Documentation/security/secrets/coco.rst b/Documentation/security/secrets/coco.rst
new file mode 100644
index 000000000000..262e7abb1b24
--- /dev/null
+++ b/Documentation/security/secrets/coco.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+Confidential Computing secrets
+==============================
+
+This document describes how Confidential Computing secret injection is handled
+from the firmware to the operating system, in the EFI driver and the efi_secret
+kernel module.
+
+
+Introduction
+============
+
+Confidential Computing (coco) hardware such as AMD SEV (Secure Encrypted
+Virtualization) allows guest owners to inject secrets into the VMs
+memory without the host/hypervisor being able to read them. In SEV,
+secret injection is performed early in the VM launch process, before the
+guest starts running.
+
+The efi_secret kernel module allows userspace applications to access these
+secrets via securityfs.
+
+
+Secret data flow
+================
+
+The guest firmware may reserve a designated memory area for secret injection,
+and publish its location (base GPA and length) in the EFI configuration table
+under a ``LINUX_EFI_COCO_SECRET_AREA_GUID`` entry
+(``adf956ad-e98c-484c-ae11-b51c7d336447``). This memory area should be marked
+by the firmware as ``EFI_RESERVED_TYPE``, and therefore the kernel should not
+be use it for its own purposes.
+
+During the VM's launch, the virtual machine manager may inject a secret to that
+area. In AMD SEV and SEV-ES this is performed using the
+``KVM_SEV_LAUNCH_SECRET`` command (see [sev]_). The strucutre of the injected
+Guest Owner secret data should be a GUIDed table of secret values; the binary
+format is described in ``drivers/virt/coco/efi_secret/efi_secret.c`` under
+"Structure of the EFI secret area".
+
+On kernel start, the kernel's EFI driver saves the location of the secret area
+(taken from the EFI configuration table) in the ``efi.coco_secret`` field.
+Later it checks if the secret area is populated: it maps the area and checks
+whether its content begins with ``EFI_SECRET_TABLE_HEADER_GUID``
+(``1e74f542-71dd-4d66-963e-ef4287ff173b``). If the secret area is populated,
+the EFI driver will autoload the efi_secret kernel module, which exposes the
+secrets to userspace applications via securityfs. The details of the
+efi_secret filesystem interface are in [secrets-coco-abi]_.
+
+
+Application usage example
+=========================
+
+Consider a guest performing computations on encrypted files. The Guest Owner
+provides the decryption key (= secret) using the secret injection mechanism.
+The guest application reads the secret from the efi_secret filesystem and
+proceeds to decrypt the files into memory and then performs the needed
+computations on the content.
+
+In this example, the host can't read the files from the disk image
+because they are encrypted. Host can't read the decryption key because
+it is passed using the secret injection mechanism (= secure channel).
+Host can't read the decrypted content from memory because it's a
+confidential (memory-encrypted) guest.
+
+Here is a simple example for usage of the efi_secret module in a guest
+to which an EFI secret area with 4 secrets was injected during launch::
+
+ # ls -la /sys/kernel/security/secrets/coco
+ total 0
+ drwxr-xr-x 2 root root 0 Jun 28 11:54 .
+ drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+ -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+ -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+ -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+ -r--r----- 1 root root 0 Jun 28 11:54 e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+ # hd /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+ 00000000 74 68 65 73 65 2d 61 72 65 2d 74 68 65 2d 6b 61 |these-are-the-ka|
+ 00000010 74 61 2d 73 65 63 72 65 74 73 00 01 02 03 04 05 |ta-secrets......|
+ 00000020 06 07 |..|
+ 00000022
+
+ # rm /sys/kernel/security/secrets/coco/e6f5a162-d67f-4750-a67c-5d065f2a9910
+
+ # ls -la /sys/kernel/security/secrets/coco
+ total 0
+ drwxr-xr-x 2 root root 0 Jun 28 11:55 .
+ drwxr-xr-x 3 root root 0 Jun 28 11:54 ..
+ -r--r----- 1 root root 0 Jun 28 11:54 736870e5-84f0-4973-92ec-06879ce3da0b
+ -r--r----- 1 root root 0 Jun 28 11:54 83c83f7f-1356-4975-8b7e-d3a0b54312c6
+ -r--r----- 1 root root 0 Jun 28 11:54 9553f55d-3da2-43ee-ab5d-ff17f78864d2
+
+
+References
+==========
+
+See [sev-api-spec]_ for more info regarding SEV ``LAUNCH_SECRET`` operation.
+
+.. [sev] Documentation/virt/kvm/amd-memory-encryption.rst
+.. [secrets-coco-abi] Documentation/ABI/testing/securityfs-secrets-coco
+.. [sev-api-spec] https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf
diff --git a/Documentation/security/secrets/index.rst b/Documentation/security/secrets/index.rst
new file mode 100644
index 000000000000..ced34e9c43bd
--- /dev/null
+++ b/Documentation/security/secrets/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Secrets documentation
+=====================
+
+.. toctree::
+
+ coco
diff --git a/Documentation/security/siphash.rst b/Documentation/security/siphash.rst
index bd9363025fcb..a10380cb78e5 100644
--- a/Documentation/security/siphash.rst
+++ b/Documentation/security/siphash.rst
@@ -121,26 +121,36 @@ even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
instead of SipHash's 128-bit key. However, this may appeal to some
high-performance `jhash` users.
-Danger!
-
-Do not ever use HalfSipHash except for as a hashtable key function, and only
-then when you can be absolutely certain that the outputs will never be
-transmitted out of the kernel. This is only remotely useful over `jhash` as a
-means of mitigating hashtable flooding denial of service attacks.
-
-Generating a HalfSipHash key
-============================
+HalfSipHash support is provided through the "hsiphash" family of functions.
+
+.. warning::
+ Do not ever use the hsiphash functions except for as a hashtable key
+ function, and only then when you can be absolutely certain that the outputs
+ will never be transmitted out of the kernel. This is only remotely useful
+ over `jhash` as a means of mitigating hashtable flooding denial of service
+ attacks.
+
+On 64-bit kernels, the hsiphash functions actually implement SipHash-1-3, a
+reduced-round variant of SipHash, instead of HalfSipHash-1-3. This is because in
+64-bit code, SipHash-1-3 is no slower than HalfSipHash-1-3, and can be faster.
+Note, this does *not* mean that in 64-bit kernels the hsiphash functions are the
+same as the siphash ones, or that they are secure; the hsiphash functions still
+use a less secure reduced-round algorithm and truncate their outputs to 32
+bits.
+
+Generating a hsiphash key
+=========================
Keys should always be generated from a cryptographically secure source of
-random numbers, either using get_random_bytes or get_random_once:
+random numbers, either using get_random_bytes or get_random_once::
-hsiphash_key_t key;
-get_random_bytes(&key, sizeof(key));
+ hsiphash_key_t key;
+ get_random_bytes(&key, sizeof(key));
If you're not deriving your key from here, you're doing it wrong.
-Using the HalfSipHash functions
-===============================
+Using the hsiphash functions
+============================
There are two variants of the function, one that takes a list of integers, and
one that takes a buffer::
@@ -183,7 +193,7 @@ You may then iterate like usual over the returned hash bucket.
Performance
===========
-HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
-this will not be a problem, as the hashtable lookup isn't the bottleneck. And
-in general, this is probably a good sacrifice to make for the security and DoS
-resistance of HalfSipHash.
+hsiphash() is roughly 3 times slower than jhash(). For many replacements, this
+will not be a problem, as the hashtable lookup isn't the bottleneck. And in
+general, this is probably a good sacrifice to make for the security and DoS
+resistance of hsiphash().
diff --git a/Documentation/userspace-api/ioctl/cdrom.rst b/Documentation/userspace-api/ioctl/cdrom.rst
index 682948fc88a3..2ad91dbebd7c 100644
--- a/Documentation/userspace-api/ioctl/cdrom.rst
+++ b/Documentation/userspace-api/ioctl/cdrom.rst
@@ -718,6 +718,9 @@ CDROMPLAYBLK
CDROMGETSPINDOWN
+ Obsolete, was ide-cd only
+
+
usage::
char spindown;
@@ -736,6 +739,9 @@ CDROMGETSPINDOWN
CDROMSETSPINDOWN
+ Obsolete, was ide-cd only
+
+
usage::
char spindown
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 85c7abc51af5..4a900cdbc62e 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5986,16 +5986,16 @@ should put the acknowledged interrupt vector into the 'epr' field.
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
__u32 type;
- __u64 flags;
+ __u32 ndata;
+ __u64 data[16];
} system_event;
If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
a system-level event using some architecture specific mechanism (hypercall
or some special instruction). In case of ARM64, this is triggered using
-HVC instruction based PSCI call from the vcpu. The 'type' field describes
-the system-level event type. The 'flags' field describes architecture
-specific flags for the system-level event.
+HVC instruction based PSCI call from the vcpu.
+The 'type' field describes the system-level event type.
Valid values for 'type' are:
- KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
@@ -6010,10 +6010,20 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM.
-Valid flags are:
+If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain
+architecture specific information for the system-level event. Only
+the first `ndata` items (possibly zero) of the data array are valid.
- - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued
- a SYSTEM_RESET2 call according to v1.1 of the PSCI specification.
+ - for arm64, data[0] is set to KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 if
+ the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI
+ specification.
+
+ - for RISC-V, data[0] is set to the value of the second argument of the
+ ``sbi_system_reset`` call.
+
+Previous versions of Linux defined a `flags` member in this struct. The
+field is now aliased to `data[0]`. Userspace can assume that it is only
+written if ndata is greater than 0.
::
diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst
index 65204d7f004f..7e0c3f574e78 100644
--- a/Documentation/vm/page_owner.rst
+++ b/Documentation/vm/page_owner.rst
@@ -110,7 +110,7 @@ Usage
If you want to sort by the page nums of buf, use the ``-m`` parameter.
The detailed parameters are:
- fundamental function:
+ fundamental function::
Sort:
-a Sort by memory allocation time.
@@ -122,7 +122,7 @@ Usage
-s Sort by stack trace.
-t Sort by times (default).
- additional function:
+ additional function::
Cull:
--cull <rules>
@@ -153,6 +153,7 @@ Usage
STANDARD FORMAT SPECIFIERS
==========================
+::
KEY LONG DESCRIPTION
p pid process ID
diff --git a/MAINTAINERS b/MAINTAINERS
index 40fa1955ca3f..f468864fd268 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2644,6 +2644,7 @@ L: linux-samsung-soc@vger.kernel.org
S: Maintained
C: irc://irc.libera.chat/linux-exynos
Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
+B: mailto:linux-samsung-soc@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git
F: Documentation/arm/samsung/
F: Documentation/devicetree/bindings/arm/samsung/
@@ -3570,8 +3571,9 @@ M: Andy Gospodarek <andy@greyhouse.net>
L: netdev@vger.kernel.org
S: Supported
W: http://sourceforge.net/projects/bonding/
+F: Documentation/networking/bonding.rst
F: drivers/net/bonding/
-F: include/net/bonding.h
+F: include/net/bond*
F: include/uapi/linux/if_bonding.h
BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
@@ -3913,7 +3915,9 @@ BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER
M: Michael Chan <michael.chan@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
+F: drivers/firmware/broadcom/tee_bnxt_fw.c
F: drivers/net/ethernet/broadcom/bnxt/
+F: include/linux/firmware/broadcom/tee_bnxt_fw.h
BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
M: Arend van Spriel <aspriel@gmail.com>
@@ -5436,6 +5440,7 @@ F: net/ax25/sysctl_net_ax25.c
DATA ACCESS MONITOR
M: SeongJae Park <sj@kernel.org>
+L: damon@lists.linux.dev
L: linux-mm@kvack.org
S: Maintained
F: Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -5914,7 +5919,7 @@ R: Benjamin Gaignard <benjamin.gaignard@collabora.com>
R: Liam Mark <lmark@codeaurora.org>
R: Laura Abbott <labbott@redhat.com>
R: Brian Starkey <Brian.Starkey@arm.com>
-R: John Stultz <john.stultz@linaro.org>
+R: John Stultz <jstultz@google.com>
L: linux-media@vger.kernel.org
L: dri-devel@lists.freedesktop.org
L: linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
@@ -6584,7 +6589,7 @@ F: drivers/gpu/drm/gma500/
DRM DRIVERS FOR HISILICON
M: Xinliang Liu <xinliang.liu@linaro.org>
M: Tian Tao <tiantao6@hisilicon.com>
-R: John Stultz <john.stultz@linaro.org>
+R: John Stultz <jstultz@google.com>
R: Xinwei Kong <kong.kongxinwei@hisilicon.com>
R: Chen Feng <puck.chen@hisilicon.com>
L: dri-devel@lists.freedesktop.org
@@ -7496,7 +7501,7 @@ F: Documentation/hwmon/f71805f.rst
F: drivers/hwmon/f71805f.c
FADDR2LINE
-M: Josh Poimboeuf <jpoimboe@redhat.com>
+M: Josh Poimboeuf <jpoimboe@kernel.org>
S: Maintained
F: scripts/faddr2line
@@ -8109,7 +8114,7 @@ M: Ingo Molnar <mingo@redhat.com>
R: Peter Zijlstra <peterz@infradead.org>
R: Darren Hart <dvhart@infradead.org>
R: Davidlohr Bueso <dave@stgolabs.net>
-R: André Almeida <andrealmeid@collabora.com>
+R: André Almeida <andrealmeid@igalia.com>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
@@ -8382,7 +8387,7 @@ M: Linus Walleij <linus.walleij@linaro.org>
M: Bartosz Golaszewski <brgl@bgdev.pl>
L: linux-gpio@vger.kernel.org
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
F: Documentation/ABI/obsolete/sysfs-gpio
F: Documentation/ABI/testing/gpio-cdev
F: Documentation/admin-guide/gpio/
@@ -8845,7 +8850,7 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt
F: drivers/net/ethernet/hisilicon/
HIKEY960 ONBOARD USB GPIO HUB DRIVER
-M: John Stultz <john.stultz@linaro.org>
+M: John Stultz <jstultz@google.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/misc/hisi_hikey_usb.c
@@ -9337,14 +9342,12 @@ F: drivers/pci/hotplug/rpaphp*
IBM Power SRIOV Virtual NIC Device Driver
M: Dany Madden <drt@linux.ibm.com>
-M: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
R: Thomas Falcon <tlfalcon@linux.ibm.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/ibm/ibmvnic.*
IBM Power Virtual Accelerator Switchboard
-M: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
L: linuxppc-dev@lists.ozlabs.org
S: Supported
F: arch/powerpc/include/asm/vas.h
@@ -10130,7 +10133,7 @@ S: Supported
F: drivers/net/wireless/intel/iwlegacy/
INTEL WIRELESS WIFI LINK (iwlwifi)
-M: Luca Coelho <luciano.coelho@intel.com>
+M: Gregory Greenman <gregory.greenman@intel.com>
L: linux-wireless@vger.kernel.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
@@ -10239,8 +10242,6 @@ F: drivers/net/ethernet/sgi/ioc3-eth.c
IOMAP FILESYSTEM LIBRARY
M: Christoph Hellwig <hch@infradead.org>
M: Darrick J. Wong <djwong@kernel.org>
-M: linux-xfs@vger.kernel.org
-M: linux-fsdevel@vger.kernel.org
L: linux-xfs@vger.kernel.org
L: linux-fsdevel@vger.kernel.org
S: Supported
@@ -10549,6 +10550,7 @@ M: Andrey Ryabinin <ryabinin.a.a@gmail.com>
R: Alexander Potapenko <glider@google.com>
R: Andrey Konovalov <andreyknvl@gmail.com>
R: Dmitry Vyukov <dvyukov@google.com>
+R: Vincenzo Frascino <vincenzo.frascino@arm.com>
L: kasan-dev@googlegroups.com
S: Maintained
F: Documentation/dev-tools/kasan.rst
@@ -11348,7 +11350,7 @@ F: drivers/mmc/host/litex_mmc.c
N: litex
LIVE PATCHING
-M: Josh Poimboeuf <jpoimboe@redhat.com>
+M: Josh Poimboeuf <jpoimboe@kernel.org>
M: Jiri Kosina <jikos@kernel.org>
M: Miroslav Benes <mbenes@suse.cz>
M: Petr Mladek <pmladek@suse.com>
@@ -11974,6 +11976,7 @@ M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-pm@vger.kernel.org
S: Supported
+B: mailto:linux-samsung-soc@vger.kernel.org
F: Documentation/devicetree/bindings/power/supply/maxim,max14577.yaml
F: Documentation/devicetree/bindings/power/supply/maxim,max77693.yaml
F: drivers/power/supply/max14577_charger.c
@@ -11985,6 +11988,7 @@ M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-kernel@vger.kernel.org
S: Supported
+B: mailto:linux-samsung-soc@vger.kernel.org
F: Documentation/devicetree/bindings/*/maxim,max14577.yaml
F: Documentation/devicetree/bindings/*/maxim,max77686.yaml
F: Documentation/devicetree/bindings/*/maxim,max77693.yaml
@@ -12678,6 +12682,7 @@ MEMORY CONTROLLER DRIVERS
M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
L: linux-kernel@vger.kernel.org
S: Maintained
+B: mailto:krzysztof.kozlowski@linaro.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git
F: Documentation/devicetree/bindings/memory-controllers/
F: drivers/memory/
@@ -13626,6 +13631,7 @@ F: net/core/drop_monitor.c
NETWORKING DRIVERS
M: "David S. Miller" <davem@davemloft.net>
+M: Eric Dumazet <edumazet@google.com>
M: Jakub Kicinski <kuba@kernel.org>
M: Paolo Abeni <pabeni@redhat.com>
L: netdev@vger.kernel.org
@@ -13673,6 +13679,7 @@ F: tools/testing/selftests/drivers/net/dsa/
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
+M: Eric Dumazet <edumazet@google.com>
M: Jakub Kicinski <kuba@kernel.org>
M: Paolo Abeni <pabeni@redhat.com>
L: netdev@vger.kernel.org
@@ -13823,6 +13830,7 @@ M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
L: linux-nfc@lists.01.org (subscribers-only)
L: netdev@vger.kernel.org
S: Maintained
+B: mailto:linux-nfc@lists.01.org
F: Documentation/devicetree/bindings/net/nfc/
F: drivers/nfc/
F: include/linux/platform_data/nfcmrvl.h
@@ -14218,7 +14226,7 @@ F: lib/objagg.c
F: lib/test_objagg.c
OBJTOOL
-M: Josh Poimboeuf <jpoimboe@redhat.com>
+M: Josh Poimboeuf <jpoimboe@kernel.org>
M: Peter Zijlstra <peterz@infradead.org>
S: Supported
F: tools/objtool/
@@ -14366,7 +14374,6 @@ F: arch/arm/*omap*/*pm*
F: drivers/cpufreq/omap-cpufreq.c
OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT
-M: Rajendra Nayak <rnayak@codeaurora.org>
M: Paul Walmsley <paul@pwsan.com>
L: linux-omap@vger.kernel.org
S: Maintained
@@ -15469,7 +15476,8 @@ F: tools/perf/
PERFORMANCE EVENTS TOOLING ARM64
R: John Garry <john.garry@huawei.com>
R: Will Deacon <will@kernel.org>
-R: Mathieu Poirier <mathieu.poirier@linaro.org>
+R: James Clark <james.clark@arm.com>
+R: Mike Leach <mike.leach@linaro.org>
R: Leo Yan <leo.yan@linaro.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
@@ -15609,6 +15617,7 @@ L: linux-samsung-soc@vger.kernel.org
S: Maintained
C: irc://irc.libera.chat/linux-exynos
Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
+B: mailto:linux-samsung-soc@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git
F: Documentation/devicetree/bindings/pinctrl/samsung,pinctrl*yaml
F: drivers/pinctrl/samsung/
@@ -17325,6 +17334,7 @@ M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
+B: mailto:linux-samsung-soc@vger.kernel.org
F: Documentation/devicetree/bindings/sound/samsung*
F: sound/soc/samsung/
@@ -17369,6 +17379,7 @@ M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
L: linux-kernel@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Supported
+B: mailto:linux-samsung-soc@vger.kernel.org
F: Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml
F: Documentation/devicetree/bindings/mfd/samsung,s2m*.yaml
F: Documentation/devicetree/bindings/mfd/samsung,s5m*.yaml
@@ -18783,7 +18794,7 @@ F: include/dt-bindings/reset/starfive-jh7100.h
STATIC BRANCH/CALL
M: Peter Zijlstra <peterz@infradead.org>
-M: Josh Poimboeuf <jpoimboe@redhat.com>
+M: Josh Poimboeuf <jpoimboe@kernel.org>
M: Jason Baron <jbaron@akamai.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: Ard Biesheuvel <ardb@kernel.org>
@@ -19784,7 +19795,7 @@ F: drivers/net/wireless/ti/
F: include/linux/wl12xx.h
TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
-M: John Stultz <john.stultz@linaro.org>
+M: John Stultz <jstultz@google.com>
M: Thomas Gleixner <tglx@linutronix.de>
R: Stephen Boyd <sboyd@kernel.org>
L: linux-kernel@vger.kernel.org
@@ -21434,6 +21445,15 @@ F: arch/x86/include/asm/uv/
F: arch/x86/kernel/apic/x2apic_uv_x.c
F: arch/x86/platform/uv/
+X86 STACK UNWINDING
+M: Josh Poimboeuf <jpoimboe@kernel.org>
+M: Peter Zijlstra <peterz@infradead.org>
+S: Supported
+F: arch/x86/include/asm/unwind*.h
+F: arch/x86/kernel/dumpstack.c
+F: arch/x86/kernel/stacktrace.c
+F: arch/x86/kernel/unwind_*.c
+
X86 VDSO
M: Andy Lutomirski <luto@kernel.org>
L: linux-kernel@vger.kernel.org
@@ -21596,7 +21616,6 @@ F: drivers/xen/*swiotlb*
XFS FILESYSTEM
C: irc://irc.oftc.net/xfs
M: Darrick J. Wong <djwong@kernel.org>
-M: linux-xfs@vger.kernel.org
L: linux-xfs@vger.kernel.org
S: Supported
W: http://xfs.org/
diff --git a/Makefile b/Makefile
index fa5112a0ec1b..7d5b0bfe7960 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 18
SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION =
NAME = Superb Owl
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 29b0167c088b..513a29c62a9b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -35,6 +35,7 @@ config KPROBES
depends on MODULES
depends on HAVE_KPROBES
select KALLSYMS
+ select TASKS_RCU if PREEMPTION
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
@@ -854,10 +855,8 @@ config HAVE_ARCH_HUGE_VMAP
#
# Archs that select this would be capable of PMD-sized vmaps (i.e.,
-# arch_vmap_pmd_supported() returns true), and they must make no assumptions
-# that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag
-# can be used to prohibit arch-specific allocations from using hugepages to
-# help with this (e.g., modules may require it).
+# arch_vmap_pmd_supported() returns true). The VM_ALLOW_HUGE_VMAP flag
+# must be used to enable allocations to use hugepages.
#
config HAVE_ARCH_HUGE_VMALLOC
depends on HAVE_ARCH_HUGE_VMAP
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index dcaa44e408ac..f48ba03e9b5e 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -275,7 +275,7 @@
cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
<&creg_gpio 1 GPIO_ACTIVE_LOW>;
- spi-flash@0 {
+ flash@0 {
compatible = "sst26wf016b", "jedec,spi-nor";
reg = <0>;
#address-cells = <1>;
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
index 088d348781c1..1b0ffaeee16d 100644
--- a/arch/arc/include/asm/atomic-llsc.h
+++ b/arch/arc/include/asm/atomic-llsc.h
@@ -5,7 +5,7 @@
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-#define ATOMIC_OP(op, c_op, asm_op) \
+#define ATOMIC_OP(op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned int val; \
@@ -21,7 +21,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \
: "cc"); \
} \
-#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
+#define ATOMIC_OP_RETURN(op, asm_op) \
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
{ \
unsigned int val; \
@@ -42,7 +42,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
-#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+#define ATOMIC_FETCH_OP(op, asm_op) \
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
{ \
unsigned int val, orig; \
@@ -69,23 +69,23 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
-#define ATOMIC_OPS(op, c_op, asm_op) \
- ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op) \
- ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(op, asm_op) \
+ ATOMIC_OP(op, asm_op) \
+ ATOMIC_OP_RETURN(op, asm_op) \
+ ATOMIC_FETCH_OP(op, asm_op)
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op) \
- ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_FETCH_OP(op, c_op, asm_op)
+#define ATOMIC_OPS(op, asm_op) \
+ ATOMIC_OP(op, asm_op) \
+ ATOMIC_FETCH_OP(op, asm_op)
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
#define arch_atomic_andnot arch_atomic_andnot
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
index 7848348719b2..64ca25d199be 100644
--- a/arch/arc/include/asm/pgtable-levels.h
+++ b/arch/arc/include/asm/pgtable-levels.h
@@ -98,9 +98,6 @@
/*
* 1st level paging: pgd
*/
-#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
-#define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr))
-#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_ERROR(e) \
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index 03f8b1be0c3a..1e1db51b6941 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -366,7 +366,7 @@ void __kprobes disasm_instr(unsigned long addr, struct disasm_state *state,
case op_SP: /* LD_S|LDB_S b,[sp,u7], ST_S|STB_S b,[sp,u7] */
/* note: we are ignoring possibility of:
* ADD_S, SUB_S, PUSH_S, POP_S as these should not
- * cause unaliged exception anyway */
+ * cause unaligned exception anyway */
state->write = BITS(state->words[0], 6, 6);
state->zz = BITS(state->words[0], 5, 5);
if (state->zz)
@@ -503,7 +503,6 @@ int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs,
{
struct disasm_state instr;
- memset(&instr, 0, sizeof(struct disasm_state));
disasm_instr(pc, &instr, 0, regs, cregs);
*next_pc = pc + instr.instr_len;
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index dd77a0c8f740..66ba549b520f 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -196,6 +196,7 @@ tracesys_exit:
st r0, [sp, PT_r0] ; sys call return value in pt_regs
;POST Sys Call Ptrace Hook
+ mov r0, sp ; pt_regs needed
bl @syscall_trace_exit
b ret_from_exception ; NOT ret_from_system_call at is saves r0 which
; we'd done before calling post hook above
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index f748483628f2..3c1590c27fae 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -319,7 +319,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
regs->ret = (unsigned long)ksig->ka.sa.sa_handler;
/*
- * handler returns using sigreturn stub provided already by userpsace
+ * handler returns using sigreturn stub provided already by userspace
* If not, nuke the process right away
*/
if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 78e6d069b1c1..d947473f1e6d 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
struct plat_smp_ops __weak plat_smp_ops;
-/* XXX: per cpu ? Only needed once in early seconday boot */
+/* XXX: per cpu ? Only needed once in early secondary boot */
struct task_struct *secondary_idle_tsk;
/* Called from start_kernel */
@@ -274,7 +274,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
* and read back old value
*/
do {
- new = old = READ_ONCE(*ipi_data_ptr);
+ new = old = *ipi_data_ptr;
new |= 1U << msg;
} while (cmpxchg(ipi_data_ptr, old, new) != old);
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index d63ebd81f1c6..99a9b92ed98d 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -237,7 +237,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
if (state.fault)
goto fault;
- /* clear any remanants of delay slot */
+ /* clear any remnants of delay slot */
if (delay_mode(regs)) {
regs->ret = regs->bta & ~1U;
regs->status32 &= ~STATUS_DE_MASK;
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 8aa1231865d1..5446967ea98d 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -401,7 +401,7 @@ static inline void __before_dc_op(const int op)
{
if (op == OP_FLUSH_N_INV) {
/* Dcache provides 2 cmd: FLUSH or INV
- * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+ * INV in turn has sub-modes: DISCARD or FLUSH-BEFORE
* flush-n-inv is achieved by INV cmd but with IM=1
* So toggle INV sub-mode depending on op request and default
*/
diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi
index c9629cb5ccd1..7da42a5b959c 100644
--- a/arch/arm/boot/dts/am33xx-l4.dtsi
+++ b/arch/arm/boot/dts/am33xx-l4.dtsi
@@ -263,6 +263,8 @@
compatible = "ti,am3359-tscadc";
reg = <0x0 0x1000>;
interrupts = <16>;
+ clocks = <&adc_tsc_fck>;
+ clock-names = "fck";
status = "disabled";
dmas = <&edma 53 0>, <&edma 57 0>;
dma-names = "fifo0", "fifo1";
diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts
index 0d2fac98ce7d..c8b80f156ec9 100644
--- a/arch/arm/boot/dts/am3517-evm.dts
+++ b/arch/arm/boot/dts/am3517-evm.dts
@@ -161,6 +161,8 @@
/* HS USB Host PHY on PORT 1 */
hsusb1_phy: hsusb1_phy {
+ pinctrl-names = "default";
+ pinctrl-0 = <&hsusb1_rst_pins>;
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>; /* gpio_57 */
#phy-cells = <0>;
@@ -168,7 +170,9 @@
};
&davinci_emac {
- status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&ethernet_pins>;
+ status = "okay";
};
&davinci_mdio {
@@ -193,6 +197,8 @@
};
&i2c2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c2_pins>;
clock-frequency = <400000>;
/* User DIP swithes [1:8] / User LEDS [1:2] */
tca6416: gpio@21 {
@@ -205,6 +211,8 @@
};
&i2c3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c3_pins>;
clock-frequency = <400000>;
};
@@ -223,6 +231,8 @@
};
&usbhshost {
+ pinctrl-names = "default";
+ pinctrl-0 = <&hsusb1_pins>;
port1-mode = "ehci-phy";
};
@@ -231,8 +241,35 @@
};
&omap3_pmx_core {
- pinctrl-names = "default";
- pinctrl-0 = <&hsusb1_rst_pins>;
+
+ ethernet_pins: pinmux_ethernet_pins {
+ pinctrl-single,pins = <
+ OMAP3_CORE1_IOPAD(0x21fe, PIN_INPUT | MUX_MODE0) /* rmii_mdio_data */
+ OMAP3_CORE1_IOPAD(0x2200, MUX_MODE0) /* rmii_mdio_clk */
+ OMAP3_CORE1_IOPAD(0x2202, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd0 */
+ OMAP3_CORE1_IOPAD(0x2204, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd1 */
+ OMAP3_CORE1_IOPAD(0x2206, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_crs_dv */
+ OMAP3_CORE1_IOPAD(0x2208, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_rxer */
+ OMAP3_CORE1_IOPAD(0x220a, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd0 */
+ OMAP3_CORE1_IOPAD(0x220c, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd1 */
+ OMAP3_CORE1_IOPAD(0x220e, PIN_OUTPUT_PULLDOWN |MUX_MODE0) /* rmii_txen */
+ OMAP3_CORE1_IOPAD(0x2210, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_50mhz_clk */
+ >;
+ };
+
+ i2c2_pins: pinmux_i2c2_pins {
+ pinctrl-single,pins = <
+ OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_scl */
+ OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_sda */
+ >;
+ };
+
+ i2c3_pins: pinmux_i2c3_pins {
+ pinctrl-single,pins = <
+ OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_scl */
+ OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_sda */
+ >;
+ };
leds_pins: pinmux_leds_pins {
pinctrl-single,pins = <
@@ -300,8 +337,6 @@
};
&omap3_pmx_core2 {
- pinctrl-names = "default";
- pinctrl-0 = <&hsusb1_pins>;
hsusb1_pins: pinmux_hsusb1_pins {
pinctrl-single,pins = <
diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi
index 8b669e2eafec..f7b680f6c48a 100644
--- a/arch/arm/boot/dts/am3517-som.dtsi
+++ b/arch/arm/boot/dts/am3517-som.dtsi
@@ -69,6 +69,8 @@
};
&i2c1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_pins>;
clock-frequency = <400000>;
s35390a: s35390a@30 {
@@ -179,6 +181,13 @@
&omap3_pmx_core {
+ i2c1_pins: pinmux_i2c1_pins {
+ pinctrl-single,pins = <
+ OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_scl */
+ OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_sda */
+ >;
+ };
+
wl12xx_buffer_pins: pinmux_wl12xx_buffer_pins {
pinctrl-single,pins = <
OMAP3_CORE1_IOPAD(0x2156, PIN_OUTPUT | MUX_MODE4) /* mmc1_dat7.gpio_129 */
diff --git a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
index e71ccfd1df63..ff4c07c69af1 100644
--- a/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
+++ b/arch/arm/boot/dts/aspeed-bmc-asrock-romed8hm3.dts
@@ -100,12 +100,14 @@
lm25066@40 {
compatible = "lm25066";
reg = <0x40>;
+ shunt-resistor-micro-ohms = <1000>;
};
/* 12VSB PMIC */
lm25066@41 {
compatible = "lm25066";
reg = <0x41>;
+ shunt-resistor-micro-ohms = <10000>;
};
};
@@ -196,7 +198,7 @@
gpio-line-names =
/* A */ "LOCATORLED_STATUS_N", "BMC_MAC2_INTB", "NMI_BTN_N", "BMC_NMI",
"", "", "", "",
- /* B */ "DDR_MEM_TEMP", "", "", "", "", "", "", "",
+ /* B */ "POST_COMPLETE_N", "", "", "", "", "", "", "",
/* C */ "", "", "", "", "PCIE_HP_SEL_N", "PCIE_SATA_SEL_N", "LOCATORBTN", "",
/* D */ "BMC_PSIN", "BMC_PSOUT", "BMC_RESETCON", "RESETCON",
"", "", "", "PSU_FAN_FAIL_N",
diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
index e4775bbceecc..7cd4f075e325 100644
--- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi
@@ -117,9 +117,9 @@
groups = "FWSPID";
};
- pinctrl_fwqspid_default: fwqspid_default {
- function = "FWSPID";
- groups = "FWQSPID";
+ pinctrl_fwqspi_default: fwqspi_default {
+ function = "FWQSPI";
+ groups = "FWQSPI";
};
pinctrl_fwspiwp_default: fwspiwp_default {
@@ -653,12 +653,12 @@
};
pinctrl_qspi1_default: qspi1_default {
- function = "QSPI1";
+ function = "SPI1";
groups = "QSPI1";
};
pinctrl_qspi2_default: qspi2_default {
- function = "QSPI2";
+ function = "SPI2";
groups = "QSPI2";
};
diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
index 3d5ce9da42c3..9d2a0ce4ca06 100644
--- a/arch/arm/boot/dts/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6.dtsi
@@ -389,6 +389,16 @@
reg = <0x1e6f2000 0x1000>;
};
+ video: video@1e700000 {
+ compatible = "aspeed,ast2600-video-engine";
+ reg = <0x1e700000 0x1000>;
+ clocks = <&syscon ASPEED_CLK_GATE_VCLK>,
+ <&syscon ASPEED_CLK_GATE_ECLK>;
+ clock-names = "vclk", "eclk";
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
gpio0: gpio@1e780000 {
#gpio-cells = <2>;
gpio-controller;
diff --git a/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi b/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi
index c1c8650dafce..3542ad8a243e 100644
--- a/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi
+++ b/arch/arm/boot/dts/at91-dvk_su60_somc.dtsi
@@ -44,7 +44,7 @@
status = "okay";
/* spi0.0: 4M Flash Macronix MX25R4035FM1IL0 */
- spi-flash@0 {
+ flash@0 {
compatible = "mxicy,mx25u4035", "jedec,spi-nor";
spi-max-frequency = <33000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91-kizbox3-hs.dts b/arch/arm/boot/dts/at91-kizbox3-hs.dts
index f7d90cf1bb77..2799b2a1f4d2 100644
--- a/arch/arm/boot/dts/at91-kizbox3-hs.dts
+++ b/arch/arm/boot/dts/at91-kizbox3-hs.dts
@@ -225,7 +225,7 @@
pinctrl_pio_io_reset: gpio_io_reset {
pinmux = <PIN_PB30__GPIO>;
bias-disable;
- drive-open-drain;
+ drive-open-drain = <1>;
output-low;
};
pinctrl_pio_input: gpio_input {
diff --git a/arch/arm/boot/dts/at91-kizbox3_common.dtsi b/arch/arm/boot/dts/at91-kizbox3_common.dtsi
index 465664628419..abe27adfa4d6 100644
--- a/arch/arm/boot/dts/at91-kizbox3_common.dtsi
+++ b/arch/arm/boot/dts/at91-kizbox3_common.dtsi
@@ -211,7 +211,7 @@
pinmux = <PIN_PD12__FLEXCOM4_IO0>, //DATA
<PIN_PD13__FLEXCOM4_IO1>; //CLK
bias-disable;
- drive-open-drain;
+ drive-open-drain = <1>;
};
pinctrl_pwm0 {
diff --git a/arch/arm/boot/dts/at91-q5xr5.dts b/arch/arm/boot/dts/at91-q5xr5.dts
index 47a00062f01f..9cf60b6f695c 100644
--- a/arch/arm/boot/dts/at91-q5xr5.dts
+++ b/arch/arm/boot/dts/at91-q5xr5.dts
@@ -125,7 +125,7 @@
cs-gpios = <&pioA 3 GPIO_ACTIVE_HIGH>, <&pioC 11 GPIO_ACTIVE_LOW>, <0>, <0>;
status = "okay";
- m25p80@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
spi-max-frequency = <20000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
index 21c86171e462..ba621783acdb 100644
--- a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
+++ b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
@@ -214,7 +214,7 @@
pinctrl-0 = <&pinctrl_qspi1_default>;
status = "disabled";
- qspi1_flash: spi_flash@0 {
+ qspi1_flash: flash@0 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "jedec,spi-nor";
diff --git a/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts
index c145c4e5ef58..5e8755f22784 100644
--- a/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d27_wlsom1_ek.dts
@@ -191,7 +191,7 @@
&qspi1 {
status = "okay";
- qspi1_flash: spi_flash@0 {
+ qspi1_flash: flash@0 {
status = "okay";
};
};
diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index 9bf2ec0ba3e2..cdfe891f9a9e 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -137,7 +137,7 @@
pinctrl-0 = <&pinctrl_spi0_default>;
status = "okay";
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
index d72c042f2850..a49c2966b41e 100644
--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
@@ -57,8 +57,8 @@
};
spi0: spi@f0004000 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_spi0_cs>;
+ pinctrl-names = "default", "cs";
+ pinctrl-1 = <&pinctrl_spi0_cs>;
cs-gpios = <&pioD 13 0>, <0>, <0>, <&pioD 16 0>;
status = "okay";
};
@@ -171,8 +171,8 @@
};
spi1: spi@f8008000 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_spi1_cs>;
+ pinctrl-names = "default", "cs";
+ pinctrl-1 = <&pinctrl_spi1_cs>;
cs-gpios = <&pioC 25 0>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi b/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi
index 710cb72bda5a..fd1086f52b40 100644
--- a/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi
+++ b/arch/arm/boot/dts/at91-sama5d4_ma5d4.dtsi
@@ -49,7 +49,7 @@
cs-gpios = <&pioC 3 0>, <0>, <0>, <0>;
status = "okay";
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index d241c24f0d83..e519d2747936 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
@@ -81,8 +81,8 @@
};
spi1: spi@fc018000 {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_spi0_cs>;
+ pinctrl-names = "default", "cs";
+ pinctrl-1 = <&pinctrl_spi1_cs>;
cs-gpios = <&pioB 21 0>;
status = "okay";
};
@@ -140,7 +140,7 @@
atmel,pins =
<AT91_PIOE 1 AT91_PERIPH_GPIO AT91_PINCTRL_PULL_UP_DEGLITCH>;
};
- pinctrl_spi0_cs: spi0_cs_default {
+ pinctrl_spi1_cs: spi1_cs_default {
atmel,pins =
<AT91_PIOB 21 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
};
diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts
index fe432b6b7e95..7017f626f362 100644
--- a/arch/arm/boot/dts/at91-sama5d4ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d4ek.dts
@@ -65,7 +65,7 @@
spi0: spi@f8010000 {
cs-gpios = <&pioC 3 0>, <0>, <0>, <0>;
status = "okay";
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts
index 08685a10eda1..d83f76a6cd6a 100644
--- a/arch/arm/boot/dts/at91-sama7g5ek.dts
+++ b/arch/arm/boot/dts/at91-sama7g5ek.dts
@@ -495,7 +495,7 @@
pinctrl_flx3_default: flx3_default {
pinmux = <PIN_PD16__FLEXCOM3_IO0>,
<PIN_PD17__FLEXCOM3_IO1>;
- bias-disable;
+ bias-pull-up;
};
pinctrl_flx4_default: flx4_default {
@@ -655,7 +655,7 @@
<PIN_PB21__QSPI0_INT>;
bias-disable;
slew-rate = <0>;
- atmel,drive-strength = <ATMEL_PIO_DRVSTR_HI>;
+ atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>;
};
pinctrl_sdmmc0_default: sdmmc0_default {
diff --git a/arch/arm/boot/dts/at91-vinco.dts b/arch/arm/boot/dts/at91-vinco.dts
index a51a3372afa1..ebeaa6ab500e 100644
--- a/arch/arm/boot/dts/at91-vinco.dts
+++ b/arch/arm/boot/dts/at91-vinco.dts
@@ -59,7 +59,7 @@
spi0: spi@f8010000 {
cs-gpios = <&pioC 3 0>, <0>, <0>, <0>;
status = "okay";
- m25p80@0 {
+ flash@0 {
compatible = "n25q32b", "jedec,spi-nor";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91rm9200ek.dts b/arch/arm/boot/dts/at91rm9200ek.dts
index e1ef4e44e663..4624a6f076f8 100644
--- a/arch/arm/boot/dts/at91rm9200ek.dts
+++ b/arch/arm/boot/dts/at91rm9200ek.dts
@@ -73,7 +73,7 @@
spi0: spi@fffe0000 {
status = "okay";
cs-gpios = <&pioA 3 0>, <0>, <0>, <0>;
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
spi-max-frequency = <15000000>;
reg = <0>;
@@ -94,7 +94,7 @@
status = "okay";
};
- nor_flash@10000000 {
+ flash@10000000 {
compatible = "cfi-flash";
reg = <0x10000000 0x800000>;
linux,mtd-name = "physmap-flash.0";
diff --git a/arch/arm/boot/dts/at91sam9260ek.dts b/arch/arm/boot/dts/at91sam9260ek.dts
index ce96345d28a3..6381088ba24f 100644
--- a/arch/arm/boot/dts/at91sam9260ek.dts
+++ b/arch/arm/boot/dts/at91sam9260ek.dts
@@ -92,7 +92,7 @@
spi0: spi@fffc8000 {
cs-gpios = <0>, <&pioC 11 0>, <0>, <0>;
- mtd_dataflash@1 {
+ flash@1 {
compatible = "atmel,at45", "atmel,dataflash";
spi-max-frequency = <50000000>;
reg = <1>;
diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts
index beed819609e8..8f11c0b7d76d 100644
--- a/arch/arm/boot/dts/at91sam9261ek.dts
+++ b/arch/arm/boot/dts/at91sam9261ek.dts
@@ -145,7 +145,7 @@
cs-gpios = <&pioA 3 0>, <0>, <&pioA 28 0>, <0>;
status = "okay";
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
reg = <0>;
spi-max-frequency = <15000000>;
diff --git a/arch/arm/boot/dts/at91sam9263ek.dts b/arch/arm/boot/dts/at91sam9263ek.dts
index 71f60576761a..42e734020235 100644
--- a/arch/arm/boot/dts/at91sam9263ek.dts
+++ b/arch/arm/boot/dts/at91sam9263ek.dts
@@ -95,7 +95,7 @@
spi0: spi@fffa4000 {
status = "okay";
cs-gpios = <&pioA 5 0>, <0>, <0>, <0>;
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
index 87bb39060e8b..85c17dd1c8d5 100644
--- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
@@ -110,7 +110,7 @@
spi0: spi@fffc8000 {
cs-gpios = <0>, <&pioC 11 0>, <0>, <0>;
- mtd_dataflash@1 {
+ flash@1 {
compatible = "atmel,at45", "atmel,dataflash";
spi-max-frequency = <50000000>;
reg = <1>;
@@ -214,11 +214,23 @@
24c512@50 {
compatible = "atmel,24c512";
reg = <0x50>;
+ vcc-supply = <&reg_3v3>;
};
wm8731: wm8731@1b {
compatible = "wm8731";
reg = <0x1b>;
+
+ /* PCK0 at 12MHz */
+ clocks = <&pmc PMC_TYPE_SYSTEM 8>;
+ clock-names = "mclk";
+ assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>;
+ assigned-clock-rates = <12000000>;
+
+ HPVDD-supply = <&vcc_dac>;
+ AVDD-supply = <&vcc_dac>;
+ DCVDD-supply = <&reg_3v3>;
+ DBVDD-supply = <&reg_3v3>;
};
};
@@ -254,4 +266,35 @@
atmel,ssc-controller = <&ssc0>;
atmel,audio-codec = <&wm8731>;
};
+
+ reg_5v: fixedregulator0 {
+ compatible = "regulator-fixed";
+ regulator-name = "5V";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
+ reg_3v3: fixedregulator1 {
+ compatible = "regulator-fixed";
+ regulator-name = "3V3";
+ vin-supply = <&reg_5v>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ reg_1v: fixedregulator2 {
+ compatible = "regulator-fixed";
+ regulator-name = "1V";
+ vin-supply = <&reg_5v>;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ };
+
+ vcc_dac: fixedregulator3 {
+ compatible = "regulator-fixed";
+ regulator-name = "VCC_DAC";
+ vin-supply = <&reg_3v3>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
};
diff --git a/arch/arm/boot/dts/at91sam9m10g45ek.dts b/arch/arm/boot/dts/at91sam9m10g45ek.dts
index b6256a20fbc7..e5db198a87a8 100644
--- a/arch/arm/boot/dts/at91sam9m10g45ek.dts
+++ b/arch/arm/boot/dts/at91sam9m10g45ek.dts
@@ -167,7 +167,7 @@
spi0: spi@fffa4000{
status = "okay";
cs-gpios = <&pioB 3 0>, <0>, <0>, <0>;
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
spi-max-frequency = <13000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index 2bc4e6e0a923..c905d7bfc771 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -119,7 +119,7 @@
spi0: spi@f0000000 {
status = "okay";
cs-gpios = <&pioA 14 0>, <0>, <0>, <0>;
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91sam9rlek.dts b/arch/arm/boot/dts/at91sam9rlek.dts
index 62981b39c815..d74b8d9d84aa 100644
--- a/arch/arm/boot/dts/at91sam9rlek.dts
+++ b/arch/arm/boot/dts/at91sam9rlek.dts
@@ -180,7 +180,7 @@
spi0: spi@fffcc000 {
status = "okay";
cs-gpios = <&pioA 28 0>, <0>, <0>, <0>;
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
spi-max-frequency = <15000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/at91sam9x5ek.dtsi b/arch/arm/boot/dts/at91sam9x5ek.dtsi
index 6d1264de6060..5f4eaa618ab4 100644
--- a/arch/arm/boot/dts/at91sam9x5ek.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5ek.dtsi
@@ -125,7 +125,7 @@
cs-gpios = <&pioA 14 0>, <0>, <0>, <0>;
status = "disabled"; /* conflicts with mmc1 */
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 0a11bacffc1f..5733e3a4ea8e 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -4188,11 +4188,11 @@
reg = <0x1d0010 0x4>;
reg-names = "sysc";
ti,sysc-midle = <SYSC_IDLE_FORCE>,
- <SYSC_IDLE_NO>,
- <SYSC_IDLE_SMART>;
+ <SYSC_IDLE_NO>;
ti,sysc-sidle = <SYSC_IDLE_FORCE>,
<SYSC_IDLE_NO>,
<SYSC_IDLE_SMART>;
+ power-domains = <&prm_vpe>;
clocks = <&vpe_clkctrl DRA7_VPE_VPE_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
index ed2739e39085..bd763bae596b 100644
--- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
@@ -286,6 +286,8 @@
codec: sgtl5000@a {
compatible = "fsl,sgtl5000";
reg = <0x0a>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sgtl5000>;
clocks = <&clks IMX6QDL_CLK_CKO>;
VDDA-supply = <&reg_module_3v3_audio>;
VDDIO-supply = <&reg_module_3v3>;
@@ -517,8 +519,6 @@
MX6QDL_PAD_DISP0_DAT21__AUD4_TXD 0x130b0
MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0
MX6QDL_PAD_DISP0_DAT23__AUD4_RXD 0x130b0
- /* SGTL5000 sys_mclk */
- MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0
>;
};
@@ -811,6 +811,12 @@
>;
};
+ pinctrl_sgtl5000: sgtl5000grp {
+ fsl,pins = <
+ MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0
+ >;
+ };
+
pinctrl_spdif: spdifgrp {
fsl,pins = <
MX6QDL_PAD_GPIO_16__SPDIF_IN 0x1b0b0
diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi
index 7f35a06dff95..951a2a6c5a65 100644
--- a/arch/arm/boot/dts/imx6ull-colibri.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi
@@ -37,7 +37,7 @@
reg_sd1_vmmc: regulator-sd1-vmmc {
compatible = "regulator-gpio";
- gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>;
+ gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_snvs_reg_sd>;
regulator-always-on;
diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
index 2a0a98fe67f0..3240c67e0c39 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
@@ -11,3 +11,18 @@
model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit";
compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3";
};
+
+&omap3_pmx_core2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&hsusb2_2_pins>;
+ hsusb2_2_pins: pinmux_hsusb2_2_pins {
+ pinctrl-single,pins = <
+ OMAP3430_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */
+ OMAP3430_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */
+ OMAP3430_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */
+ OMAP3430_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */
+ OMAP3430_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */
+ OMAP3430_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */
+ >;
+ };
+};
diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
index a604d92221a4..c757f0d7781c 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
@@ -11,3 +11,18 @@
model = "LogicPD Zoom DM3730 SOM-LV Development Kit";
compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3";
};
+
+&omap3_pmx_core2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&hsusb2_2_pins>;
+ hsusb2_2_pins: pinmux_hsusb2_2_pins {
+ pinctrl-single,pins = <
+ OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */
+ OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */
+ OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */
+ OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */
+ OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */
+ OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */
+ >;
+ };
+};
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index b56524cc7fe2..55b619c99e24 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -265,21 +265,6 @@
};
};
-&omap3_pmx_core2 {
- pinctrl-names = "default";
- pinctrl-0 = <&hsusb2_2_pins>;
- hsusb2_2_pins: pinmux_hsusb2_2_pins {
- pinctrl-single,pins = <
- OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */
- OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */
- OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */
- OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */
- OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */
- OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */
- >;
- };
-};
-
&uart2 {
interrupts-extended = <&intc 73 &omap3_pmx_core OMAP3_UART2_RX>;
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index 7e3d8147e2c1..0365f06165e9 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -31,6 +31,8 @@
aliases {
display0 = &lcd;
display1 = &tv0;
+ /delete-property/ mmc2;
+ /delete-property/ mmc3;
};
ldo_3v3: fixedregulator {
diff --git a/arch/arm/boot/dts/sama5d3xmb.dtsi b/arch/arm/boot/dts/sama5d3xmb.dtsi
index a499de8a7a64..3652c9e24124 100644
--- a/arch/arm/boot/dts/sama5d3xmb.dtsi
+++ b/arch/arm/boot/dts/sama5d3xmb.dtsi
@@ -26,7 +26,7 @@
spi0: spi@f0004000 {
dmas = <0>, <0>; /* Do not use DMA for spi0 */
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi b/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi
index fa9e5e2a745d..5d9e97fecf83 100644
--- a/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi
+++ b/arch/arm/boot/dts/sama5d3xmb_cmp.dtsi
@@ -25,7 +25,7 @@
spi0: spi@f0004000 {
dmas = <0>, <0>; /* Do not use DMA for spi0 */
- m25p80@0 {
+ flash@0 {
compatible = "atmel,at25df321a";
spi-max-frequency = <50000000>;
reg = <0>;
diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi
index 4decd3a91a76..f691c8f08d04 100644
--- a/arch/arm/boot/dts/sama7g5.dtsi
+++ b/arch/arm/boot/dts/sama7g5.dtsi
@@ -601,9 +601,9 @@
#size-cells = <0>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 39>;
atmel,fifo-size = <32>;
- dmas = <&dma0 AT91_XDMAC_DT_PERID(7)>,
- <&dma0 AT91_XDMAC_DT_PERID(8)>;
- dma-names = "rx", "tx";
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(8)>,
+ <&dma0 AT91_XDMAC_DT_PERID(7)>;
+ dma-names = "tx", "rx";
status = "disabled";
};
};
@@ -786,9 +786,9 @@
#size-cells = <0>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 46>;
atmel,fifo-size = <32>;
- dmas = <&dma0 AT91_XDMAC_DT_PERID(21)>,
- <&dma0 AT91_XDMAC_DT_PERID(22)>;
- dma-names = "rx", "tx";
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(22)>,
+ <&dma0 AT91_XDMAC_DT_PERID(21)>;
+ dma-names = "tx", "rx";
status = "disabled";
};
};
@@ -810,9 +810,9 @@
#size-cells = <0>;
clocks = <&pmc PMC_TYPE_PERIPHERAL 47>;
atmel,fifo-size = <32>;
- dmas = <&dma0 AT91_XDMAC_DT_PERID(23)>,
- <&dma0 AT91_XDMAC_DT_PERID(24)>;
- dma-names = "rx", "tx";
+ dmas = <&dma0 AT91_XDMAC_DT_PERID(24)>,
+ <&dma0 AT91_XDMAC_DT_PERID(23)>;
+ dma-names = "tx", "rx";
status = "disabled";
};
};
diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts
index 8a0cfbfd0c45..b6cb9cdf8197 100644
--- a/arch/arm/boot/dts/usb_a9263.dts
+++ b/arch/arm/boot/dts/usb_a9263.dts
@@ -60,7 +60,7 @@
spi0: spi@fffa4000 {
cs-gpios = <&pioB 15 GPIO_ACTIVE_HIGH>;
status = "okay";
- mtd_dataflash@0 {
+ flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
reg = <0>;
spi-max-frequency = <15000000>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 6e0c8c19b35c..d6a6811f0539 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -673,6 +673,7 @@ CONFIG_VIDEO_STI_DELTA=m
CONFIG_VIDEO_RENESAS_FDP1=m
CONFIG_VIDEO_RENESAS_JPU=m
CONFIG_VIDEO_RENESAS_VSP1=m
+CONFIG_VIDEO_TEGRA_VDE=m
CONFIG_V4L_TEST_DRIVERS=y
CONFIG_VIDEO_VIVID=m
CONFIG_VIDEO_ADV7180=m
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 289d022acc4b..c209722399d7 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -286,7 +286,8 @@ CONFIG_SERIO_NVEC_PS2=y
CONFIG_NVEC_POWER=y
CONFIG_NVEC_PAZ00=y
CONFIG_STAGING_MEDIA=y
-CONFIG_TEGRA_VDE=y
+CONFIG_V4L_MEM2MEM_DRIVERS=y
+CONFIG_VIDEO_TEGRA_VDE=y
CONFIG_CHROME_PLATFORMS=y
CONFIG_CROS_EC=y
CONFIG_CROS_EC_I2C=m
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 0c70eb688a00..2a0739a2350b 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -440,6 +440,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+ unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
#endif
/*
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 06508698abb8..7a8682468a84 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -1145,7 +1145,7 @@ vector_bhb_loop8_\name:
@ bhb workaround
mov r0, #8
-3: b . + 4
+3: W(b) . + 4
subs r0, r0, #1
bne 3b
dsb
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index f7d993628cb7..a9c1efcf7c9c 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -17,7 +17,6 @@ menuconfig ARCH_EXYNOS
select EXYNOS_PMU
select EXYNOS_SROM
select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
- select GPIOLIB
select HAVE_ARM_ARCH_TIMER if ARCH_EXYNOS5
select HAVE_ARM_SCU if SMP
select PINCTRL
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index 5c3845730dbf..0b80f8bcd304 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -314,10 +314,12 @@ void __init omap_gic_of_init(void)
np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic");
gic_dist_base_addr = of_iomap(np, 0);
+ of_node_put(np);
WARN_ON(!gic_dist_base_addr);
np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-twd-timer");
twd_base = of_iomap(np, 0);
+ of_node_put(np);
WARN_ON(!twd_base);
skip_errata_init:
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index aa08bcb72db9..290702328a33 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -493,3 +493,11 @@ void __init early_ioremap_init(void)
{
early_ioremap_setup();
}
+
+bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+ unsigned long flags)
+{
+ unsigned long pfn = PHYS_PFN(offset);
+
+ return memblock_is_map_memory(pfn);
+}
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 06dbfb968182..fb9f3eb6bf48 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -288,6 +288,7 @@ void cpu_v7_ca15_ibe(void)
{
if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
cpu_v7_spectre_v2_init();
+ cpu_v7_spectre_bhb_init();
}
void cpu_v7_bugs_init(void)
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index ec5b082f3de6..07eb69f9e7df 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -337,12 +337,15 @@ int __init arch_xen_unpopulated_init(struct resource **res)
if (!nr_reg) {
pr_err("No extended regions are found\n");
+ of_node_put(np);
return -EINVAL;
}
regs = kcalloc(nr_reg, sizeof(*regs), GFP_KERNEL);
- if (!regs)
+ if (!regs) {
+ of_node_put(np);
return -ENOMEM;
+ }
/*
* Create resource from extended regions provided by the hypervisor to be
@@ -403,8 +406,8 @@ int __init arch_xen_unpopulated_init(struct resource **res)
*res = &xen_resource;
err:
+ of_node_put(np);
kfree(regs);
-
return rc;
}
#endif
@@ -424,8 +427,10 @@ static void __init xen_dt_guest_init(void)
if (of_address_to_resource(xen_node, GRANT_TABLE_INDEX, &res)) {
pr_err("Xen grant table region is not found\n");
+ of_node_put(xen_node);
return;
}
+ of_node_put(xen_node);
xen_grant_frames = res.start;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 57c4c995965f..20ea89d9ac2f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -175,8 +175,6 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_REGS \
- if $(cc-option,-fpatchable-function-entry=2)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -228,6 +226,17 @@ config ARM64
help
ARM 64-bit (AArch64) Linux support.
+config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS
+ def_bool CC_IS_CLANG
+ # https://github.com/ClangBuiltLinux/linux/issues/1507
+ depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600))
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS
+
+config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS
+ def_bool CC_IS_GCC
+ depends on $(cc-option,-fpatchable-function-entry=2)
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS
+
config 64BIT
def_bool y
@@ -678,7 +687,7 @@ config ARM64_ERRATUM_2051678
default y
help
This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678.
- Affected Coretex-A510 might not respect the ordering rules for
+ Affected Cortex-A510 might not respect the ordering rules for
hardware update of the page table's dirty bit. The workaround
is to not enable the feature on affected CPUs.
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
index d61f43052a34..8e9ad1e51d66 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi
@@ -11,26 +11,6 @@
compatible = "operating-points-v2";
opp-shared;
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <731000>;
- };
-
- opp-250000000 {
- opp-hz = /bits/ 64 <250000000>;
- opp-microvolt = <731000>;
- };
-
- opp-500000000 {
- opp-hz = /bits/ 64 <500000000>;
- opp-microvolt = <731000>;
- };
-
- opp-667000000 {
- opp-hz = /bits/ 64 <667000000>;
- opp-microvolt = <731000>;
- };
-
opp-1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <761000>;
@@ -71,26 +51,6 @@
compatible = "operating-points-v2";
opp-shared;
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <731000>;
- };
-
- opp-250000000 {
- opp-hz = /bits/ 64 <250000000>;
- opp-microvolt = <731000>;
- };
-
- opp-500000000 {
- opp-hz = /bits/ 64 <500000000>;
- opp-microvolt = <731000>;
- };
-
- opp-667000000 {
- opp-hz = /bits/ 64 <667000000>;
- opp-microvolt = <731000>;
- };
-
opp-1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <731000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
index 1e5d0ee5d541..44c23c984034 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi
@@ -11,26 +11,6 @@
compatible = "operating-points-v2";
opp-shared;
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <731000>;
- };
-
- opp-250000000 {
- opp-hz = /bits/ 64 <250000000>;
- opp-microvolt = <731000>;
- };
-
- opp-500000000 {
- opp-hz = /bits/ 64 <500000000>;
- opp-microvolt = <731000>;
- };
-
- opp-667000000 {
- opp-hz = /bits/ 64 <667000000>;
- opp-microvolt = <731000>;
- };
-
opp-1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <731000>;
@@ -76,26 +56,6 @@
compatible = "operating-points-v2";
opp-shared;
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <751000>;
- };
-
- opp-250000000 {
- opp-hz = /bits/ 64 <250000000>;
- opp-microvolt = <751000>;
- };
-
- opp-500000000 {
- opp-hz = /bits/ 64 <500000000>;
- opp-microvolt = <751000>;
- };
-
- opp-667000000 {
- opp-hz = /bits/ 64 <667000000>;
- opp-microvolt = <751000>;
- };
-
opp-1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <771000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
index bf9ae1e1016b..480afa2cc61f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
@@ -13,28 +13,28 @@
cpu0: cpu@0 {
device_type = "cpu";
- compatible = "arm,cortex-a35","arm,armv8";
+ compatible = "arm,cortex-a35";
reg = <0x0 0x0>;
enable-method = "psci";
};
cpu1: cpu@1 {
device_type = "cpu";
- compatible = "arm,cortex-a35","arm,armv8";
+ compatible = "arm,cortex-a35";
reg = <0x0 0x1>;
enable-method = "psci";
};
cpu2: cpu@2 {
device_type = "cpu";
- compatible = "arm,cortex-a35","arm,armv8";
+ compatible = "arm,cortex-a35";
reg = <0x0 0x2>;
enable-method = "psci";
};
cpu3: cpu@3 {
device_type = "cpu";
- compatible = "arm,cortex-a35","arm,armv8";
+ compatible = "arm,cortex-a35";
reg = <0x0 0x3>;
enable-method = "psci";
};
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
index 5751c48620ed..cadba194b149 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
@@ -437,6 +437,7 @@
"",
"eMMC_RST#", /* BOOT_12 */
"eMMC_DS", /* BOOT_13 */
+ "", "",
/* GPIOC */
"SD_D0_B", /* GPIOC_0 */
"SD_D1_B", /* GPIOC_1 */
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
index 3c07a89bfd27..80737731af3f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi
@@ -95,26 +95,6 @@
compatible = "operating-points-v2";
opp-shared;
- opp-100000000 {
- opp-hz = /bits/ 64 <100000000>;
- opp-microvolt = <730000>;
- };
-
- opp-250000000 {
- opp-hz = /bits/ 64 <250000000>;
- opp-microvolt = <730000>;
- };
-
- opp-500000000 {
- opp-hz = /bits/ 64 <500000000>;
- opp-microvolt = <730000>;
- };
-
- opp-667000000 {
- opp-hz = /bits/ 64 <666666666>;
- opp-microvolt = <750000>;
- };
-
opp-1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <770000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
index 73addc0b8e57..cce55c3c5df0 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw71xx.dtsi
@@ -146,12 +146,14 @@
&usbotg1 {
dr_mode = "otg";
+ over-current-active-low;
vbus-supply = <&reg_usb_otg1_vbus>;
status = "okay";
};
&usbotg2 {
dr_mode = "host";
+ disable-over-current;
status = "okay";
};
@@ -215,7 +217,7 @@
fsl,pins = <
MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
+ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
index 1e7badb2a82e..f61e4847fa49 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
@@ -211,12 +211,14 @@
&usbotg1 {
dr_mode = "otg";
+ over-current-active-low;
vbus-supply = <&reg_usb_otg1_vbus>;
status = "okay";
};
&usbotg2 {
dr_mode = "host";
+ disable-over-current;
vbus-supply = <&reg_usb_otg2_vbus>;
status = "okay";
};
@@ -309,7 +311,7 @@
fsl,pins = <
MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
+ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
index 426483ec1f88..023619648966 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
@@ -238,12 +238,14 @@
&usbotg1 {
dr_mode = "otg";
+ over-current-active-low;
vbus-supply = <&reg_usb_otg1_vbus>;
status = "okay";
};
&usbotg2 {
dr_mode = "host";
+ disable-over-current;
vbus-supply = <&reg_usb_otg2_vbus>;
status = "okay";
};
@@ -358,7 +360,7 @@
fsl,pins = <
MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0xd6
- MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0xd6
+ MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0xd6
MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
index 7dfee715a2c4..d8ce217c6016 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
@@ -59,6 +59,10 @@
interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
rohm,reset-snvs-powered;
+ #clock-cells = <0>;
+ clocks = <&osc_32k 0>;
+ clock-output-names = "clk-32k-out";
+
regulators {
buck1_reg: BUCK1 {
regulator-name = "buck1";
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index 99f0f5026674..5c0ca2490561 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -293,7 +293,7 @@
ranges;
sai2: sai@30020000 {
- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30020000 0x10000>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI2_IPG>,
@@ -307,7 +307,7 @@
};
sai3: sai@30030000 {
- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30030000 0x10000>;
interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI3_IPG>,
@@ -321,7 +321,7 @@
};
sai5: sai@30050000 {
- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30050000 0x10000>;
interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI5_IPG>,
@@ -337,7 +337,7 @@
};
sai6: sai@30060000 {
- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x30060000 0x10000>;
interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI6_IPG>,
@@ -394,7 +394,7 @@
};
sai7: sai@300b0000 {
- compatible = "fsl,imx8mm-sai", "fsl,imx8mq-sai";
+ compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai";
reg = <0x300b0000 0x10000>;
interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SAI7_IPG>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
index 38ffcd145b33..899e8e7dbc24 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
@@ -253,7 +253,7 @@
#address-cells = <1>;
#size-cells = <1>;
spi-max-frequency = <84000000>;
- spi-tx-bus-width = <4>;
+ spi-tx-bus-width = <1>;
spi-rx-bus-width = <4>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm.dtsi b/arch/arm64/boot/dts/freescale/imx8qm.dtsi
index be8c76a0554c..4f767012f1f5 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm.dtsi
@@ -196,7 +196,7 @@
};
clk: clock-controller {
- compatible = "fsl,imx8qxp-clk", "fsl,scu-clk";
+ compatible = "fsl,imx8qm-clk", "fsl,scu-clk";
#clock-cells = <2>;
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
index 1df84335925b..aff857df25cf 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
@@ -262,25 +262,25 @@
gpio4 {
pins = "gpio4";
function = "32k-out1";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio5 {
pins = "gpio5";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <0>;
};
gpio6 {
pins = "gpio6";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio7 {
pins = "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <0>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts b/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts
index 1ab132c152bb..4631504c3c7a 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p3509-0000+p3636-0001.dts
@@ -462,25 +462,25 @@
gpio4 {
pins = "gpio4";
function = "32k-out1";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio5 {
pins = "gpio5";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <0>;
};
gpio6 {
pins = "gpio6";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio7 {
pins = "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
index 634d0f493c2e..a7d7cfd66379 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi
@@ -174,19 +174,19 @@
gpio4 {
pins = "gpio4";
function = "32k-out1";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio6 {
pins = "gpio6";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio7 {
pins = "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <0>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
index 0b219e72765e..0bd66f9c620b 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
@@ -148,19 +148,19 @@
gpio4 {
pins = "gpio4";
function = "32k-out1";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio6 {
pins = "gpio6";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio7 {
pins = "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <0>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
index 0fe772b04bd0..75eb743a7242 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi
@@ -59,7 +59,7 @@
gpio1 {
pins = "gpio1";
function = "fps-out";
- drive-push-pull;
+ drive-push-pull = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
maxim,active-fps-power-up-slot = <7>;
maxim,active-fps-power-down-slot = <0>;
@@ -68,7 +68,7 @@
gpio2_3 {
pins = "gpio2", "gpio3";
function = "fps-out";
- drive-open-drain;
+ drive-open-drain = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
};
@@ -80,7 +80,7 @@
gpio5_6_7 {
pins = "gpio5", "gpio6", "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi
index 936a309e288c..10347b6e6e84 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p2894.dtsi
@@ -1351,7 +1351,7 @@
gpio1 {
pins = "gpio1";
function = "fps-out";
- drive-push-pull;
+ drive-push-pull = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
maxim,active-fps-power-up-slot = <7>;
maxim,active-fps-power-down-slot = <0>;
@@ -1360,14 +1360,14 @@
gpio2 {
pins = "gpio2";
function = "fps-out";
- drive-open-drain;
+ drive-open-drain = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
};
gpio3 {
pins = "gpio3";
function = "fps-out";
- drive-open-drain;
+ drive-open-drain = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
};
@@ -1379,7 +1379,7 @@
gpio5_6_7 {
pins = "gpio5", "gpio6", "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts
index f6446120c267..72c2dc3c14ea 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts
@@ -195,7 +195,7 @@
gpio1 {
pins = "gpio1";
function = "fps-out";
- drive-push-pull;
+ drive-push-pull = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_NONE>;
maxim,active-fps-power-up-slot = <0>;
maxim,active-fps-power-down-slot = <7>;
@@ -204,7 +204,7 @@
gpio2 {
pins = "gpio2";
function = "fps-out";
- drive-open-drain;
+ drive-open-drain = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
maxim,active-fps-power-up-slot = <0>;
maxim,active-fps-power-down-slot = <7>;
@@ -213,7 +213,7 @@
gpio3 {
pins = "gpio3";
function = "fps-out";
- drive-open-drain;
+ drive-open-drain = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
maxim,active-fps-power-up-slot = <4>;
maxim,active-fps-power-down-slot = <3>;
@@ -227,7 +227,7 @@
gpio5_6_7 {
pins = "gpio5", "gpio6", "gpio7";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts b/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts
index e42384f097d6..a263d51882ee 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra210-smaug.dts
@@ -1386,7 +1386,7 @@
gpio3 {
pins = "gpio3";
function = "fps-out";
- drive-open-drain;
+ drive-open-drain = <1>;
maxim,active-fps-source = <MAX77620_FPS_SRC_0>;
maxim,active-fps-power-up-slot = <4>;
maxim,active-fps-power-down-slot = <2>;
@@ -1395,7 +1395,7 @@
gpio5_6 {
pins = "gpio5", "gpio6";
function = "gpio";
- drive-push-pull;
+ drive-push-pull = <1>;
};
gpio4 {
diff --git a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
index fb99cc2827c7..7ab3627cc347 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sm8250-mtp.dts
@@ -622,6 +622,10 @@
status = "okay";
};
+&rxmacro {
+ status = "okay";
+};
+
&slpi {
status = "okay";
firmware-name = "qcom/sm8250/slpi.mbn";
@@ -773,6 +777,8 @@
};
&swr1 {
+ status = "okay";
+
wcd_rx: wcd9380-rx@0,4 {
compatible = "sdw20217010d00";
reg = <0 4>;
@@ -781,6 +787,8 @@
};
&swr2 {
+ status = "okay";
+
wcd_tx: wcd9380-tx@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
@@ -819,6 +827,10 @@
};
};
+&txmacro {
+ status = "okay";
+};
+
&uart12 {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index af8f22636436..1304b86af1a0 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -2255,6 +2255,7 @@
pinctrl-0 = <&rx_swr_active>;
compatible = "qcom,sm8250-lpass-rx-macro";
reg = <0 0x3200000 0 0x1000>;
+ status = "disabled";
clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
<&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
@@ -2273,6 +2274,7 @@
swr1: soundwire-controller@3210000 {
reg = <0 0x3210000 0 0x2000>;
compatible = "qcom,soundwire-v1.5.1";
+ status = "disabled";
interrupts = <GIC_SPI 298 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&rxmacro>;
clock-names = "iface";
@@ -2300,6 +2302,7 @@
pinctrl-0 = <&tx_swr_active>;
compatible = "qcom,sm8250-lpass-tx-macro";
reg = <0 0x3220000 0 0x1000>;
+ status = "disabled";
clocks = <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
<&q6afecc LPASS_CLK_ID_TX_CORE_NPL_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
@@ -2323,6 +2326,7 @@
compatible = "qcom,soundwire-v1.5.1";
interrupts-extended = <&intc GIC_SPI 297 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "core";
+ status = "disabled";
clocks = <&txmacro>;
clock-names = "iface";
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
index a01886b467ed..067fe4a6b178 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
@@ -16,6 +16,7 @@
aliases {
ethernet0 = &gmac0;
+ ethernet1 = &gmac1;
mmc0 = &sdmmc0;
mmc1 = &sdhci;
};
@@ -78,7 +79,6 @@
assigned-clocks = <&cru SCLK_GMAC0_RX_TX>, <&cru SCLK_GMAC0>;
assigned-clock-parents = <&cru SCLK_GMAC0_RGMII_SPEED>, <&cru CLK_MAC0_2TOP>;
clock_in_out = "input";
- phy-handle = <&rgmii_phy0>;
phy-mode = "rgmii";
pinctrl-names = "default";
pinctrl-0 = <&gmac0_miim
@@ -90,8 +90,38 @@
snps,reset-active-low;
/* Reset time is 20ms, 100ms for rtl8211f */
snps,reset-delays-us = <0 20000 100000>;
+ tx_delay = <0x4f>;
+ rx_delay = <0x0f>;
+ status = "okay";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ pause;
+ };
+};
+
+&gmac1 {
+ assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
+ assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru CLK_MAC1_2TOP>;
+ clock_in_out = "output";
+ phy-handle = <&rgmii_phy1>;
+ phy-mode = "rgmii";
+ pinctrl-names = "default";
+ pinctrl-0 = <&gmac1m1_miim
+ &gmac1m1_tx_bus2
+ &gmac1m1_rx_bus2
+ &gmac1m1_rgmii_clk
+ &gmac1m1_rgmii_bus>;
+
+ snps,reset-gpio = <&gpio3 RK_PB0 GPIO_ACTIVE_LOW>;
+ snps,reset-active-low;
+ /* Reset time is 20ms, 100ms for rtl8211f */
+ snps,reset-delays-us = <0 20000 100000>;
+
tx_delay = <0x3c>;
rx_delay = <0x2f>;
+
status = "okay";
};
@@ -315,8 +345,8 @@
status = "disabled";
};
-&mdio0 {
- rgmii_phy0: ethernet-phy@0 {
+&mdio1 {
+ rgmii_phy1: ethernet-phy@0 {
compatible = "ethernet-phy-ieee802.3-c22";
reg = <0x0>;
};
@@ -345,9 +375,9 @@
pmuio2-supply = <&vcc3v3_pmu>;
vccio1-supply = <&vccio_acodec>;
vccio3-supply = <&vccio_sd>;
- vccio4-supply = <&vcc_1v8>;
+ vccio4-supply = <&vcc_3v3>;
vccio5-supply = <&vcc_3v3>;
- vccio6-supply = <&vcc_3v3>;
+ vccio6-supply = <&vcc_1v8>;
vccio7-supply = <&vcc_3v3>;
status = "okay";
};
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 7fd836bea7eb..3995652daf81 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -192,4 +192,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+ unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+
#endif /* __ASM_IO_H */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 7496deab025a..f71358271b71 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 94e147e5456c..dff2b483ea50 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -535,7 +535,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
-#define pmd_leaf(pmd) pmd_sect(pmd)
+#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd))
#define pmd_bad(pmd) (!pmd_table(pmd))
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
@@ -625,7 +625,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!pud_table(pud))
#define pud_present(pud) pte_present(pud_pte(pud))
-#define pud_leaf(pud) pud_sect(pud)
+#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 986837d7ec82..fa7981d0d917 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -75,6 +75,10 @@ obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
+# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
+$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
+$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
+
obj-y += probes/
head-y := head.o
extra-y += $(head-y) vmlinux.lds
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 4c9b5b4b7a0b..a0f3d0aaa3c5 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#ifdef CONFIG_ARM64_ERRATUM_1286807
{
ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+ /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */
+ ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
},
#endif
{},
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d72c4b4d389c..2cb9cc9e0eff 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -654,7 +654,6 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
&id_aa64isar1_override),
- ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
&id_aa64isar2_override),
@@ -810,7 +809,7 @@ static void __init sort_ftr_regs(void)
* to sys_id for subsequent binary search in get_arm64_ftr_reg()
* to work correctly.
*/
- BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+ BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id);
}
}
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 2b3f3d0544b9..98d67444a5b6 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -95,7 +95,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
for_each_mte_vma(current, vma) {
struct elf_phdr phdr;
- phdr.p_type = PT_ARM_MEMTAG_MTE;
+ phdr.p_type = PT_AARCH64_MEMTAG_MTE;
phdr.p_offset = offset;
phdr.p_vaddr = vma->vm_start;
phdr.p_paddr = 0;
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 78b3e0f8e997..d502703e8373 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -76,6 +76,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
mte_sync_page_tags(page, old_pte, check_swap,
pte_is_tagged);
}
+
+ /* ensure the tags are visible before the PTE is set */
+ smp_wmb();
}
int memcmp_pages(struct page *page1, struct page *page2)
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
struct pv_time_stolen_time_region {
- struct pvclock_vcpu_stolen_time *kaddr;
+ struct pvclock_vcpu_stolen_time __rcu *kaddr;
};
static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
/* return stolen time in ns by asking the hypervisor */
static u64 para_steal_clock(int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
+ u64 ret = 0;
reg = per_cpu_ptr(&stolen_time_region, cpu);
@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu)
* online notification callback runs. Until the callback
* has run we just return zero.
*/
- if (!reg->kaddr)
+ rcu_read_lock();
+ kaddr = rcu_dereference(reg->kaddr);
+ if (!kaddr) {
+ rcu_read_unlock();
return 0;
+ }
- return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+ ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+ rcu_read_unlock();
+ return ret;
}
static int stolen_time_cpu_down_prepare(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
reg = this_cpu_ptr(&stolen_time_region);
if (!reg->kaddr)
return 0;
- memunmap(reg->kaddr);
- memset(reg, 0, sizeof(*reg));
+ kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+ synchronize_rcu();
+ memunmap(kaddr);
return 0;
}
static int stolen_time_cpu_online(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
struct arm_smccc_res res;
@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsigned int cpu)
if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
return -EINVAL;
- reg->kaddr = memremap(res.a0,
+ kaddr = memremap(res.a0,
sizeof(struct pvclock_vcpu_stolen_time),
MEMREMAP_WB);
+ rcu_assign_pointer(reg->kaddr, kaddr);
+
if (!reg->kaddr) {
pr_warn("Failed to map stolen time data structure\n");
return -ENOMEM;
}
- if (le32_to_cpu(reg->kaddr->revision) != 0 ||
- le32_to_cpu(reg->kaddr->attributes) != 0) {
+ if (le32_to_cpu(kaddr->revision) != 0 ||
+ le32_to_cpu(kaddr->attributes) != 0) {
pr_warn_once("Unexpected revision or attributes in stolen time data\n");
return -ENXIO;
}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index f0a3df9e18a3..413f899e4ac6 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -37,6 +37,15 @@
* safe memory that has been set up to be preserved during the copy operation.
*/
SYM_CODE_START(arm64_relocate_new_kernel)
+ /*
+ * The kimage structure isn't allocated specially and may be clobbered
+ * during relocation. We must load any values we need from it prior to
+ * any relocation occurring.
+ */
+ ldr x28, [x0, #KIMAGE_START]
+ ldr x27, [x0, #KIMAGE_ARCH_EL2_VECTORS]
+ ldr x26, [x0, #KIMAGE_ARCH_DTB_MEM]
+
/* Setup the list loop variables. */
ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */
@@ -72,21 +81,20 @@ SYM_CODE_START(arm64_relocate_new_kernel)
ic iallu
dsb nsh
isb
- ldr x4, [x0, #KIMAGE_START] /* relocation start */
- ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */
- ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */
turn_off_mmu x12, x13
/* Start new image. */
- cbz x1, .Lel1
- mov x1, x4 /* relocation start */
- mov x2, x0 /* dtb address */
+ cbz x27, .Lel1
+ mov x1, x28 /* kernel entry point */
+ mov x2, x26 /* dtb address */
mov x3, xzr
mov x4, xzr
mov x0, #HVC_SOFT_RESTART
hvc #0 /* Jumps from el2 */
.Lel1:
+ mov x0, x26 /* dtb address */
+ mov x1, xzr
mov x2, xzr
mov x3, xzr
- br x4 /* Jumps from el1 */
+ br x28 /* Jumps from el1 */
SYM_CODE_END(arm64_relocate_new_kernel)
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 172452f79e46..ac1964ebed1e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -52,9 +52,6 @@ GCOV_PROFILE := n
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-# Force dependency (incbin is bad)
-$(obj)/vdso.o : $(obj)/vdso.so
-
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold_and_vdso_check)
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index ed181bedbffc..05ba1aae1b6f 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -131,9 +131,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-# Force dependency (vdso.s includes vdso.so through incbin)
-$(obj)/vdso.o: $(obj)/vdso.so
-
include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 523bc934fe2f..a66d83540c15 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1436,7 +1436,8 @@ static int kvm_init_vector_slots(void)
base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
- if (kvm_system_needs_idmapped_vectors() && !has_vhe()) {
+ if (kvm_system_needs_idmapped_vectors() &&
+ !is_protected_kvm_enabled()) {
err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
__BP_HARDEN_HYP_VECS_SZ, &base);
if (err)
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 3d613e721a75..727c979b2b69 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -198,15 +198,15 @@ SYM_CODE_START(__kvm_hyp_host_vector)
invalid_host_el2_vect // FIQ EL2h
invalid_host_el2_vect // Error EL2h
- host_el1_sync_vect // Synchronous 64-bit EL1
- invalid_host_el1_vect // IRQ 64-bit EL1
- invalid_host_el1_vect // FIQ 64-bit EL1
- invalid_host_el1_vect // Error 64-bit EL1
-
- invalid_host_el1_vect // Synchronous 32-bit EL1
- invalid_host_el1_vect // IRQ 32-bit EL1
- invalid_host_el1_vect // FIQ 32-bit EL1
- invalid_host_el1_vect // Error 32-bit EL1
+ host_el1_sync_vect // Synchronous 64-bit EL1/EL0
+ invalid_host_el1_vect // IRQ 64-bit EL1/EL0
+ invalid_host_el1_vect // FIQ 64-bit EL1/EL0
+ invalid_host_el1_vect // Error 64-bit EL1/EL0
+
+ host_el1_sync_vect // Synchronous 32-bit EL1/EL0
+ invalid_host_el1_vect // IRQ 32-bit EL1/EL0
+ invalid_host_el1_vect // FIQ 32-bit EL1/EL0
+ invalid_host_el1_vect // Error 32-bit EL1/EL0
SYM_CODE_END(__kvm_hyp_host_vector)
/*
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index b47df73e98d7..ba20405d2dc2 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -145,6 +145,34 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
inject_abt64(vcpu, true, addr);
}
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
+{
+ unsigned long addr, esr;
+
+ addr = kvm_vcpu_get_fault_ipa(vcpu);
+ addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ kvm_inject_pabt(vcpu, addr);
+ else
+ kvm_inject_dabt(vcpu, addr);
+
+ /*
+ * If AArch64 or LPAE, set FSC to 0 to indicate an Address
+ * Size Fault at level 0, as if exceeding PARange.
+ *
+ * Non-LPAE guests will only get the external abort, as there
+ * is no way to to describe the ASF.
+ */
+ if (vcpu_el1_is_32bit(vcpu) &&
+ !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
+ return;
+
+ esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
+ esr &= ~GENMASK_ULL(5, 0);
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+}
+
/**
* kvm_inject_undefined - inject an undefined instruction into the guest
* @vcpu: The vCPU in which to inject the exception
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 53ae2c0640bc..5400fc020164 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1337,6 +1337,25 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+ if (fault_status == FSC_FAULT) {
+ /* Beyond sanitised PARange (which is the IPA limit) */
+ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
+ kvm_inject_size_fault(vcpu);
+ return 1;
+ }
+
+ /* Falls between the IPA range and the PARange? */
+ if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ if (is_iabt)
+ kvm_inject_pabt(vcpu, fault_ipa);
+ else
+ kvm_inject_dabt(vcpu, fault_ipa);
+ return 1;
+ }
+ }
+
/* Synchronous External Abort? */
if (kvm_vcpu_abt_issea(vcpu)) {
/*
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 78fdc443adc7..3dc990ac4f44 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -177,6 +177,9 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return 0;
+
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
if (kvm_pmu_pmc_is_chained(pmc) &&
@@ -198,6 +201,9 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
{
u64 reg;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
@@ -322,6 +328,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
return;
@@ -357,7 +366,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
- if (!val)
+ if (!kvm_vcpu_has_pmu(vcpu) || !val)
return;
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -527,6 +536,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
int i;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
return;
@@ -576,6 +588,9 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
int i;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
@@ -739,6 +754,9 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
{
u64 reg, mask;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return;
+
mask = ARMV8_PMU_EVTYPE_MASK;
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
mask |= kvm_pmu_event_mask(vcpu->kvm);
@@ -827,6 +845,9 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
u64 val, mask = 0;
int base, i, nr_events;
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return 0;
+
if (!pmceid1) {
val = read_sysreg(pmceid0_el0);
base = 0;
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index baac2b405f23..708d80e8e60d 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -181,7 +181,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
- vcpu->run->system_event.flags = flags;
+ vcpu->run->system_event.ndata = 1;
+ vcpu->run->system_event.data[0] = flags;
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7b45c040cc27..adf408c09cdb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1123,8 +1123,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
- if (irqchip_in_kernel(vcpu->kvm) &&
- vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ if (kvm_vgic_global_state.type == VGIC_V3) {
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
}
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b7c81dacabf0..b21f91cd830d 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -99,3 +99,11 @@ void __init early_ioremap_init(void)
{
early_ioremap_setup();
}
+
+bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+ unsigned long flags)
+{
+ unsigned long pfn = PHYS_PFN(offset);
+
+ return pfn_is_map_memory(pfn);
+}
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
index b05bb70a2e46..8026baf46e72 100644
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -40,9 +40,9 @@
typedef unsigned int cycles_t;
/*
- * On R4000/R4400 before version 5.0 an erratum exists such that if the
- * cycle counter is read in the exact moment that it is matching the
- * compare register, no interrupt will be generated.
+ * On R4000/R4400 an erratum exists such that if the cycle counter is
+ * read in the exact moment that it is matching the compare register,
+ * no interrupt will be generated.
*
* There is a suggested workaround and also the erratum can't strike if
* the compare interrupt isn't being used as the clock source device.
@@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid)
if (!__builtin_constant_p(cpu_has_counter))
asm volatile("" : "=m" (cpu_data[0].options));
if (likely(cpu_has_counter &&
- prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0))))
+ prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15))))
return 1;
else
return 0;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index caa01457dce6..ed339d7979f3 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -141,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void)
case CPU_R4400MC:
/*
* The published errata for the R4400 up to 3.0 say the CPU
- * has the mfc0 from count bug.
+ * has the mfc0 from count bug. This seems the last version
+ * produced.
*/
- if ((current_cpu_data.processor_id & 0xff) <= 0x30)
- return 1;
-
- /*
- * we assume newer revisions are ok
- */
- return 0;
+ return 1;
}
return 0;
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 52e550b45692..bd22578859d0 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -38,6 +38,7 @@ config PARISC
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ARCH_TOPOLOGY if SMP
+ select GENERIC_CPU_DEVICES if !SMP
select GENERIC_LIB_DEVMEM_IS_ALLOWED
select SYSCTL_ARCH_UNALIGN_ALLOW
select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index a5fee10d76ee..8ce0ae370680 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -6,6 +6,9 @@ CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=16
+CONFIG_CGROUPS=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_PERF_EVENTS=y
@@ -47,7 +50,6 @@ CONFIG_PARPORT=y
CONFIG_PARPORT_PC=m
CONFIG_PARPORT_1284=y
CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=6144
CONFIG_BLK_DEV_SD=y
diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig
index 1b8fd80cbe7f..57501b0aed92 100644
--- a/arch/parisc/configs/generic-64bit_defconfig
+++ b/arch/parisc/configs/generic-64bit_defconfig
@@ -16,6 +16,7 @@ CONFIG_CGROUPS=y
CONFIG_MEMCG=y
CONFIG_CGROUP_PIDS=y
CONFIG_CPUSETS=y
+CONFIG_USER_NS=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -267,9 +268,9 @@ CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRC_CCITT=m
CONFIG_LIBCRC32C=y
CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_KERNEL=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index e8b4a03343d3..8d03b3b26229 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -59,20 +59,12 @@ void flush_dcache_page(struct page *page);
flush_kernel_icache_range_asm(s,e); \
} while (0)
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
-do { \
- flush_cache_page(vma, vaddr, page_to_pfn(page)); \
- memcpy(dst, src, len); \
- flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \
-} while (0)
-
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
-do { \
- flush_cache_page(vma, vaddr, page_to_pfn(page)); \
- memcpy(dst, src, len); \
-} while (0)
-
-void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn);
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len);
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len);
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
+ unsigned long pfn);
void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
@@ -80,16 +72,7 @@ void flush_cache_range(struct vm_area_struct *vma,
void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
#define ARCH_HAS_FLUSH_ANON_PAGE
-static inline void
-flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
-{
- if (PageAnon(page)) {
- flush_tlb_page(vma, vmaddr);
- preempt_disable();
- flush_dcache_page_asm(page_to_phys(page), vmaddr);
- preempt_enable();
- }
-}
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
#define ARCH_HAS_FLUSH_ON_KUNMAP
static inline void kunmap_flush_on_unmap(void *addr)
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 0561568f7b48..6faaaa3ebe9b 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -26,12 +26,14 @@
#define copy_page(to, from) copy_page_asm((void *)(to), (void *)(from))
struct page;
+struct vm_area_struct;
void clear_page_asm(void *page);
void copy_page_asm(void *to, void *from);
#define clear_user_page(vto, vaddr, page) clear_page_asm(vto)
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
- struct page *pg);
+void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr,
+ struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 939db6fe620b..69765a6dbe89 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -160,7 +160,7 @@ extern void __update_cache(pte_t pte);
#define SPACEID_SHIFT (MAX_ADDRBITS - 32)
#else
#define MAX_ADDRBITS (BITS_PER_LONG)
-#define MAX_ADDRESS (1UL << MAX_ADDRBITS)
+#define MAX_ADDRESS (1ULL << MAX_ADDRBITS)
#define SPACEID_SHIFT 0
#endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 23348199f3f8..0fd04073d4b6 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -27,6 +27,7 @@
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/shmparam.h>
+#include <asm/mmu_context.h>
int split_tlb __ro_after_init;
int dcache_stride __ro_after_init;
@@ -91,7 +92,7 @@ static inline void flush_data_cache(void)
}
-/* Virtual address of pfn. */
+/* Kernel virtual address of pfn. */
#define pfn_va(pfn) __va(PFN_PHYS(pfn))
void
@@ -124,11 +125,13 @@ show_cache_info(struct seq_file *m)
cache_info.ic_size/1024 );
if (cache_info.dc_loop != 1)
snprintf(buf, 32, "%lu-way associative", cache_info.dc_loop);
- seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s)\n",
+ seq_printf(m, "D-cache\t\t: %ld KB (%s%s, %s, alias=%d)\n",
cache_info.dc_size/1024,
(cache_info.dc_conf.cc_wt ? "WT":"WB"),
(cache_info.dc_conf.cc_sh ? ", shared I/D":""),
- ((cache_info.dc_loop == 1) ? "direct mapped" : buf));
+ ((cache_info.dc_loop == 1) ? "direct mapped" : buf),
+ cache_info.dc_conf.cc_alias
+ );
seq_printf(m, "ITLB entries\t: %ld\n" "DTLB entries\t: %ld%s\n",
cache_info.it_size,
cache_info.dt_size,
@@ -324,25 +327,81 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
preempt_enable();
}
-static inline void
-__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
- unsigned long physaddr)
+static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
- if (!static_branch_likely(&parisc_has_cache))
- return;
+ unsigned long flags, space, pgd, prot;
+#ifdef CONFIG_TLB_PTLOCK
+ unsigned long pgd_lock;
+#endif
+
+ vmaddr &= PAGE_MASK;
+
preempt_disable();
- purge_dcache_page_asm(physaddr, vmaddr);
+
+ /* Set context for flush */
+ local_irq_save(flags);
+ prot = mfctl(8);
+ space = mfsp(SR_USER);
+ pgd = mfctl(25);
+#ifdef CONFIG_TLB_PTLOCK
+ pgd_lock = mfctl(28);
+#endif
+ switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
+ local_irq_restore(flags);
+
+ flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
if (vma->vm_flags & VM_EXEC)
- flush_icache_page_asm(physaddr, vmaddr);
+ flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
+ flush_tlb_page(vma, vmaddr);
+
+ /* Restore previous context */
+ local_irq_save(flags);
+#ifdef CONFIG_TLB_PTLOCK
+ mtctl(pgd_lock, 28);
+#endif
+ mtctl(pgd, 25);
+ mtsp(space, SR_USER);
+ mtctl(prot, 8);
+ local_irq_restore(flags);
+
preempt_enable();
}
+static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
+{
+ pte_t *ptep = NULL;
+ pgd_t *pgd = mm->pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (!pgd_none(*pgd)) {
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ ptep = pte_offset_map(pmd, addr);
+ }
+ }
+ }
+ return ptep;
+}
+
+static inline bool pte_needs_flush(pte_t pte)
+{
+ return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_NO_CACHE))
+ == (_PAGE_PRESENT | _PAGE_ACCESSED);
+}
+
void flush_dcache_page(struct page *page)
{
struct address_space *mapping = page_mapping_file(page);
struct vm_area_struct *mpnt;
unsigned long offset;
unsigned long addr, old_addr = 0;
+ unsigned long count = 0;
pgoff_t pgoff;
if (mapping && !mapping_mapped(mapping)) {
@@ -357,33 +416,52 @@ void flush_dcache_page(struct page *page)
pgoff = page->index;
- /* We have carefully arranged in arch_get_unmapped_area() that
+ /*
+ * We have carefully arranged in arch_get_unmapped_area() that
* *any* mappings of a file are always congruently mapped (whether
* declared as MAP_PRIVATE or MAP_SHARED), so we only need
- * to flush one address here for them all to become coherent */
-
+ * to flush one address here for them all to become coherent
+ * on machines that support equivalent aliasing
+ */
flush_dcache_mmap_lock(mapping);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
+ if (parisc_requires_coherency()) {
+ pte_t *ptep;
- /* The TLB is the engine of coherence on parisc: The
- * CPU is entitled to speculate any page with a TLB
- * mapping, so here we kill the mapping then flush the
- * page along a special flush only alias mapping.
- * This guarantees that the page is no-longer in the
- * cache for any process and nor may it be
- * speculatively read in (until the user or kernel
- * specifically accesses it, of course) */
-
- flush_tlb_page(mpnt, addr);
- if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
- != (addr & (SHM_COLOUR - 1))) {
- __flush_cache_page(mpnt, addr, page_to_phys(page));
- if (parisc_requires_coherency() && old_addr)
- printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", old_addr, addr, mpnt->vm_file);
- old_addr = addr;
+ ptep = get_ptep(mpnt->vm_mm, addr);
+ if (ptep && pte_needs_flush(*ptep))
+ flush_user_cache_page(mpnt, addr);
+ } else {
+ /*
+ * The TLB is the engine of coherence on parisc:
+ * The CPU is entitled to speculate any page
+ * with a TLB mapping, so here we kill the
+ * mapping then flush the page along a special
+ * flush only alias mapping. This guarantees that
+ * the page is no-longer in the cache for any
+ * process and nor may it be speculatively read
+ * in (until the user or kernel specifically
+ * accesses it, of course)
+ */
+ flush_tlb_page(mpnt, addr);
+ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+ != (addr & (SHM_COLOUR - 1))) {
+ __flush_cache_page(mpnt, addr, page_to_phys(page));
+ /*
+ * Software is allowed to have any number
+ * of private mappings to a page.
+ */
+ if (!(mpnt->vm_flags & VM_SHARED))
+ continue;
+ if (old_addr)
+ pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
+ old_addr, addr, mpnt->vm_file);
+ old_addr = addr;
+ }
}
+ WARN_ON(++count == 4096);
}
flush_dcache_mmap_unlock(mapping);
}
@@ -417,23 +495,16 @@ void __init parisc_setup_cache_timing(void)
printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
alltime, size, rangetime);
- threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
+ threshold = L1_CACHE_ALIGN((unsigned long)((uint64_t)size * alltime / rangetime));
+ pr_info("Calculated flush threshold is %lu KiB\n",
+ threshold/1024);
/*
- * The threshold computed above isn't very reliable since the
- * flush times depend greatly on the percentage of dirty lines
- * in the flush range. Further, the whole cache time doesn't
- * include the time to refill lines that aren't in the mm/vma
- * being flushed. By timing glibc build and checks on mako cpus,
- * the following formula seems to work reasonably well. The
- * value from the timing calculation is too small, and increases
- * build and check times by almost a factor two.
+ * The threshold computed above isn't very reliable. The following
+ * heuristic works reasonably well on c8000/rp3440.
*/
threshold2 = cache_info.dc_size * num_online_cpus();
- if (threshold2 > threshold)
- threshold = threshold2;
- if (threshold)
- parisc_cache_flush_threshold = threshold;
+ parisc_cache_flush_threshold = threshold2;
printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
parisc_cache_flush_threshold/1024);
@@ -489,19 +560,47 @@ void flush_kernel_dcache_page_addr(void *addr)
}
EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
- struct page *pg)
+static void flush_cache_page_if_present(struct vm_area_struct *vma,
+ unsigned long vmaddr, unsigned long pfn)
{
- /* Copy using kernel mapping. No coherency is needed (all in
- kunmap) for the `to' page. However, the `from' page needs to
- be flushed through a mapping equivalent to the user mapping
- before it can be accessed through the kernel mapping. */
- preempt_disable();
- flush_dcache_page_asm(__pa(vfrom), vaddr);
- copy_page_asm(vto, vfrom);
- preempt_enable();
+ pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
+
+ /*
+ * The pte check is racy and sometimes the flush will trigger
+ * a non-access TLB miss. Hopefully, the page has already been
+ * flushed.
+ */
+ if (ptep && pte_needs_flush(*ptep))
+ flush_cache_page(vma, vmaddr, pfn);
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ void *kto, *kfrom;
+
+ kfrom = kmap_local_page(from);
+ kto = kmap_local_page(to);
+ flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
+ copy_page_asm(kto, kfrom);
+ kunmap_local(kto);
+ kunmap_local(kfrom);
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len)
+{
+ flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ memcpy(dst, src, len);
+ flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr, void *dst, void *src, int len)
+{
+ flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ memcpy(dst, src, len);
}
-EXPORT_SYMBOL(copy_user_page);
/* __flush_tlb_range()
*
@@ -532,92 +631,105 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
return 0;
}
-static inline unsigned long mm_total_size(struct mm_struct *mm)
+static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- struct vm_area_struct *vma;
- unsigned long usize = 0;
-
- for (vma = mm->mmap; vma; vma = vma->vm_next)
- usize += vma->vm_end - vma->vm_start;
- return usize;
-}
-
-static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr)
-{
- pte_t *ptep = NULL;
+ unsigned long addr, pfn;
+ pte_t *ptep;
- if (!pgd_none(*pgd)) {
- p4d_t *p4d = p4d_offset(pgd, addr);
- if (!p4d_none(*p4d)) {
- pud_t *pud = pud_offset(p4d, addr);
- if (!pud_none(*pud)) {
- pmd_t *pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd))
- ptep = pte_offset_map(pmd, addr);
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ /*
+ * The vma can contain pages that aren't present. Although
+ * the pte search is expensive, we need the pte to find the
+ * page pfn and to check whether the page should be flushed.
+ */
+ ptep = get_ptep(vma->vm_mm, addr);
+ if (ptep && pte_needs_flush(*ptep)) {
+ if (parisc_requires_coherency()) {
+ flush_user_cache_page(vma, addr);
+ } else {
+ pfn = pte_pfn(*ptep);
+ if (WARN_ON(!pfn_valid(pfn)))
+ return;
+ __flush_cache_page(vma, addr, PFN_PHYS(pfn));
}
}
}
- return ptep;
}
-static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static inline unsigned long mm_total_size(struct mm_struct *mm)
{
- unsigned long addr, pfn;
- pte_t *ptep;
+ struct vm_area_struct *vma;
+ unsigned long usize = 0;
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- ptep = get_ptep(mm->pgd, addr);
- if (ptep) {
- pfn = pte_pfn(*ptep);
- flush_cache_page(vma, addr, pfn);
- }
- }
+ for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
+ usize += vma->vm_end - vma->vm_start;
+ return usize;
}
void flush_cache_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
- /* Flushing the whole cache on each cpu takes forever on
- rp3440, etc. So, avoid it if the mm isn't too big. */
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- mm_total_size(mm) >= parisc_cache_flush_threshold) {
- if (mm->context.space_id)
- flush_tlb_all();
+ /*
+ * Flushing the whole cache on each cpu takes forever on
+ * rp3440, etc. So, avoid it if the mm isn't too big.
+ *
+ * Note that we must flush the entire cache on machines
+ * with aliasing caches to prevent random segmentation
+ * faults.
+ */
+ if (!parisc_requires_coherency()
+ || mm_total_size(mm) >= parisc_cache_flush_threshold) {
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+ return;
+ flush_tlb_all();
flush_cache_all();
return;
}
+ /* Flush mm */
for (vma = mm->mmap; vma; vma = vma->vm_next)
- flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end);
+ flush_cache_pages(vma, vma->vm_start, vma->vm_end);
}
-void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- end - start >= parisc_cache_flush_threshold) {
- if (vma->vm_mm->context.space_id)
- flush_tlb_range(vma, start, end);
+ if (!parisc_requires_coherency()
+ || end - start >= parisc_cache_flush_threshold) {
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
+ return;
+ flush_tlb_range(vma, start, end);
flush_cache_all();
return;
}
- flush_cache_pages(vma, vma->vm_mm, start, end);
+ flush_cache_pages(vma, start, end);
}
-void
-flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
+void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{
- if (pfn_valid(pfn)) {
- if (likely(vma->vm_mm->context.space_id)) {
- flush_tlb_page(vma, vmaddr);
- __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
- } else {
- __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
- }
+ if (WARN_ON(!pfn_valid(pfn)))
+ return;
+ if (parisc_requires_coherency())
+ flush_user_cache_page(vma, vmaddr);
+ else
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+ if (!PageAnon(page))
+ return;
+
+ if (parisc_requires_coherency()) {
+ flush_user_cache_page(vma, vmaddr);
+ return;
}
+
+ flush_tlb_page(vma, vmaddr);
+ preempt_disable();
+ flush_dcache_page_asm(page_to_phys(page), vmaddr);
+ preempt_enable();
}
void flush_kernel_vmap_range(void *vaddr, int size)
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index 3343d2fb7889..6e0b86652f30 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -152,7 +152,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
/* for absolute branch instructions we can copy iaoq_b. for relative
* branch instructions we need to calculate the new address based on the
* difference between iaoq_f and iaoq_b. We cannot use iaoq_b without
- * modificationt because it's based on our ainsn.insn address.
+ * modifications because it's based on our ainsn.insn address.
*/
if (p->post_handler)
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index d98692115221..26eb568f8b96 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -171,6 +171,7 @@ static int __init processor_probe(struct parisc_device *dev)
p->cpu_num = cpu_info.cpu_num;
p->cpu_loc = cpu_info.cpu_loc;
+ set_cpu_possible(cpuid, true);
store_cpu_topology(cpuid);
#ifdef CONFIG_SMP
@@ -419,8 +420,7 @@ show_cpuinfo (struct seq_file *m, void *v)
}
seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities);
- seq_printf(m, "model\t\t: %s\n"
- "model name\t: %s\n",
+ seq_printf(m, "model\t\t: %s - %s\n",
boot_cpu_data.pdc.sys_model_name,
cpuinfo->dev ?
cpuinfo->dev->name : "Unknown");
@@ -461,6 +461,13 @@ static struct parisc_driver cpu_driver __refdata = {
*/
void __init processor_init(void)
{
+ unsigned int cpu;
+
reset_cpu_topology();
+
+ /* reset possible mask. We will mark those which are possible. */
+ for_each_possible_cpu(cpu)
+ set_cpu_possible(cpu, false);
+
register_parisc_driver(&cpu_driver);
}
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index b91cb45ffd4e..f005ddedb50e 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -161,6 +161,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PA11
dma_ops_init();
#endif
+
+ clear_sched_clock_stable();
}
/*
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index bb27dfeeddfc..9714fbd7c42d 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -251,13 +251,9 @@ void __init time_init(void)
static int __init init_cr16_clocksource(void)
{
/*
- * The cr16 interval timers are not syncronized across CPUs, even if
- * they share the same socket.
+ * The cr16 interval timers are not synchronized across CPUs.
*/
if (num_online_cpus() > 1 && !running_on_qemu) {
- /* mark sched_clock unstable */
- clear_sched_clock_stable();
-
clocksource_cr16.name = "cr16_unstable";
clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
clocksource_cr16.rating = 0;
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index a6e61cf2cad0..b78f1b9d45c1 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -469,7 +469,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o
* panic notifiers, and we should call panic
* directly from the location that we wish.
* e.g. We should not call panic from
- * parisc_terminate, but rather the oter way around.
+ * parisc_terminate, but rather the other way around.
* This hack works, prints the panic message twice,
* and it enables reboot timers!
*/
diff --git a/arch/parisc/math-emu/dfadd.c b/arch/parisc/math-emu/dfadd.c
index ec487e07f004..00e561d4aa55 100644
--- a/arch/parisc/math-emu/dfadd.c
+++ b/arch/parisc/math-emu/dfadd.c
@@ -253,7 +253,7 @@ dbl_fadd(
return(NOEXCEPTION);
}
right_exponent = 1; /* Set exponent to reflect different bias
- * with denomalized numbers. */
+ * with denormalized numbers. */
}
else
{
diff --git a/arch/parisc/math-emu/dfsub.c b/arch/parisc/math-emu/dfsub.c
index c4f30acf2d48..4f03782284bd 100644
--- a/arch/parisc/math-emu/dfsub.c
+++ b/arch/parisc/math-emu/dfsub.c
@@ -256,7 +256,7 @@ dbl_fsub(
return(NOEXCEPTION);
}
right_exponent = 1; /* Set exponent to reflect different bias
- * with denomalized numbers. */
+ * with denormalized numbers. */
}
else
{
diff --git a/arch/parisc/math-emu/sfadd.c b/arch/parisc/math-emu/sfadd.c
index 838758279d5b..9b98c874dfac 100644
--- a/arch/parisc/math-emu/sfadd.c
+++ b/arch/parisc/math-emu/sfadd.c
@@ -249,7 +249,7 @@ sgl_fadd(
return(NOEXCEPTION);
}
right_exponent = 1; /* Set exponent to reflect different bias
- * with denomalized numbers. */
+ * with denormalized numbers. */
}
else
{
diff --git a/arch/parisc/math-emu/sfsub.c b/arch/parisc/math-emu/sfsub.c
index 583d3ace4634..29d9eed09d12 100644
--- a/arch/parisc/math-emu/sfsub.c
+++ b/arch/parisc/math-emu/sfsub.c
@@ -252,7 +252,7 @@ sgl_fsub(
return(NOEXCEPTION);
}
right_exponent = 1; /* Set exponent to reflect different bias
- * with denomalized numbers. */
+ * with denormalized numbers. */
}
else
{
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index f114e102aaf2..84bc437be5cd 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -22,6 +22,8 @@
#include <asm/traps.h>
+#define DEBUG_NATLB 0
+
/* Various important other fields */
#define bit22set(x) (x & 0x00000200)
#define bits23_25set(x) (x & 0x000001c0)
@@ -450,8 +452,8 @@ handle_nadtlb_fault(struct pt_regs *regs)
fallthrough;
case 0x380:
/* PDC and FIC instructions */
- if (printk_ratelimit()) {
- pr_warn("BUG: nullifying cache flush/purge instruction\n");
+ if (DEBUG_NATLB && printk_ratelimit()) {
+ pr_warn("WARNING: nullifying cache flush/purge instruction\n");
show_regs(regs);
}
if (insn & 0x20) {
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 40a583e9d3c7..97a76a8619fb 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -101,7 +101,7 @@ __module_alloc(unsigned long size, unsigned long start, unsigned long end, bool
* too.
*/
return __vmalloc_node_range(size, 1, start, end, gfp, prot,
- VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
+ VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __builtin_return_address(0));
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f5cbfe5efd25..f80cce0e3899 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -615,23 +615,22 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
return;
}
- /* Conditionally hard-enable interrupts. */
- if (should_hard_irq_enable()) {
- /*
- * Ensure a positive value is written to the decrementer, or
- * else some CPUs will continue to take decrementer exceptions.
- * When the PPC_WATCHDOG (decrementer based) is configured,
- * keep this at most 31 bits, which is about 4 seconds on most
- * systems, which gives the watchdog a chance of catching timer
- * interrupt hard lockups.
- */
- if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
- set_dec(0x7fffffff);
- else
- set_dec(decrementer_max);
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable())
do_hard_irq_enable();
- }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S
index eb9c81e1c218..0c4ecc8fec5a 100644
--- a/arch/powerpc/kernel/vdso/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso/gettimeofday.S
@@ -22,12 +22,15 @@
.macro cvdso_call funct call_time=0
.cfi_startproc
PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
mflr r0
- .cfi_register lr, r0
PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+ .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
#ifdef __powerpc64__
PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
#endif
get_datapage r5
.ifeq \call_time
@@ -39,13 +42,15 @@
PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
#ifdef __powerpc64__
PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_restore r2
#endif
.ifeq \call_time
cmpwi r3, 0
.endif
mtlr r0
- .cfi_restore lr
addi r1, r1, 2 * PPC_MIN_STKFRM
+ .cfi_restore lr
+ .cfi_def_cfa_offset 0
crclr so
.ifeq \call_time
beqlr+
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index e3ab9df6cf19..6cfcd20d4668 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -122,11 +122,27 @@
/* 0x0 - 0xb */
- /* 'current->mm' needs to be in r4 */
- tophys(r4, r2)
- lwz r4, MM(r4)
- tophys(r4, r4)
- /* This only clobbers r0, r3, r4 and r5 */
+ /* switch_mmu_context() needs paging, let's enable it */
+ mfmsr r9
+ ori r11, r9, MSR_DR
+ mtmsr r11
+ sync
+
+ /* switch_mmu_context() clobbers r12, rescue it */
+ SAVE_GPR(12, r1)
+
+ /* Calling switch_mmu_context(<inv>, current->mm, <inv>); */
+ lwz r4, MM(r2)
bl switch_mmu_context
+ /* restore r12 */
+ REST_GPR(12, r1)
+
+ /* Disable paging again */
+ mfmsr r9
+ li r6, MSR_DR
+ andc r9, r9, r6
+ mtmsr r9
+ sync
+
.endm
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index e4ce2a35483f..42851c32ff3b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -168,9 +168,10 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
return -EINVAL;
/* Read the entry from guest memory */
addr = base + (index * sizeof(rpte));
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_vcpu_srcu_read_lock(vcpu);
ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (ret) {
if (pte_ret_p)
*pte_ret_p = addr;
@@ -246,9 +247,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Read the table to find the root of the radix tree */
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (ret)
return ret;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index d42b4b6d4a79..85cfa6328222 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -420,13 +420,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
tbl[idx % TCES_PER_PAGE] = tce;
}
-static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
- unsigned long entry)
+static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
+ struct iommu_table *tbl, unsigned long entry)
{
- unsigned long hpa = 0;
- enum dma_data_direction dir = DMA_NONE;
+ unsigned long i;
+ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+ unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+ for (i = 0; i < subpages; ++i) {
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
+ }
}
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -485,6 +491,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
break;
}
+ iommu_tce_kill(tbl, io_entry, subpages);
+
return ret;
}
@@ -544,6 +552,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm,
break;
}
+ iommu_tce_kill(tbl, io_entry, subpages);
+
return ret;
}
@@ -590,10 +600,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir);
- iommu_tce_kill(stit->tbl, entry, 1);
if (ret != H_SUCCESS) {
- kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
goto unlock_exit;
}
}
@@ -669,13 +678,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
- goto invalidate_exit;
+ goto unlock_exit;
}
tce = be64_to_cpu(tce);
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
- goto invalidate_exit;
+ goto unlock_exit;
}
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -684,19 +693,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
iommu_tce_direction(tce));
if (ret != H_SUCCESS) {
- kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
- entry);
- goto invalidate_exit;
+ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
+ entry + i);
+ goto unlock_exit;
}
}
kvmppc_tce_put(stt, entry + i, tce);
}
-invalidate_exit:
- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
- iommu_tce_kill(stit->tbl, entry, npages);
-
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -735,20 +740,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
- goto invalidate_exit;
+ return ret;
WARN_ON_ONCE(1);
- kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
}
}
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
-invalidate_exit:
- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
- iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
-
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 870b7f0c7ea5..fdeda6a9cff4 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -247,13 +247,19 @@ static void iommu_tce_kill_rm(struct iommu_table *tbl,
tbl->it_ops->tce_kill(tbl, entry, pages, true);
}
-static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
- unsigned long entry)
+static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt,
+ struct iommu_table *tbl, unsigned long entry)
{
- unsigned long hpa = 0;
- enum dma_data_direction dir = DMA_NONE;
+ unsigned long i;
+ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+ unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+ for (i = 0; i < subpages; ++i) {
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
+ iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir);
+ }
}
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -316,6 +322,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
break;
}
+ iommu_tce_kill_rm(tbl, io_entry, subpages);
+
return ret;
}
@@ -379,6 +387,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
break;
}
+ iommu_tce_kill_rm(tbl, io_entry, subpages);
+
return ret;
}
@@ -420,10 +430,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
- iommu_tce_kill_rm(stit->tbl, entry, 1);
-
if (ret != H_SUCCESS) {
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry);
return ret;
}
}
@@ -561,7 +569,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
ua = 0;
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
- goto invalidate_exit;
+ goto unlock_exit;
}
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -570,19 +578,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
iommu_tce_direction(tce));
if (ret != H_SUCCESS) {
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
- entry);
- goto invalidate_exit;
+ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl,
+ entry + i);
+ goto unlock_exit;
}
}
kvmppc_rm_tce_put(stt, entry + i, tce);
}
-invalidate_exit:
- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
- iommu_tce_kill_rm(stit->tbl, entry, npages);
-
unlock_exit:
if (!prereg)
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
@@ -620,20 +624,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
continue;
if (ret == H_TOO_HARD)
- goto invalidate_exit;
+ return ret;
WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i);
}
}
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
-invalidate_exit:
- list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
- iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
-
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 9d373f8963ee..c943a051c6e7 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -306,10 +306,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
regs_ptr = kvmppc_get_gpr(vcpu, 5);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
hv_ptr, regs_ptr);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (err)
return H_PARAMETER;
@@ -410,10 +410,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
byteswap_hv_regs(&l2_hv);
byteswap_pt_regs(&l2_regs);
}
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
hv_ptr, regs_ptr);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (err)
return H_AUTHORITY;
@@ -600,16 +600,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
goto not_found;
/* Write what was loaded into our buffer back to the L1 guest */
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc)
goto not_found;
} else {
/* Load the data to be stored from the L1 guest into our buf */
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc)
goto not_found;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 0f847f1e5ddd..6808bda0dbc1 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -229,9 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
*/
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc)
goto fail;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 875c30c12db0..533c4232e5ab 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -425,9 +425,9 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
return EMULATE_DONE;
}
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc)
return EMULATE_DO_MMIO;
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 2f46e31c7612..4f53d0b97539 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -3,11 +3,11 @@
obj-y += callchain.o callchain_$(BITS).o perf_regs.o
obj-$(CONFIG_COMPAT) += callchain_32.o
-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
+obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o \
- generic-compat-pmu.o power10-pmu.o
+ generic-compat-pmu.o power10-pmu.o bhrb.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index d3398100a60f..c6d51e7093cf 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -91,8 +91,8 @@ extern u64 PERF_REG_EXTENDED_MASK;
/* Table of alternatives, sorted by column 0 */
static const unsigned int power10_event_alternatives[][MAX_ALT] = {
- { PM_CYC_ALT, PM_CYC },
{ PM_INST_CMPL_ALT, PM_INST_CMPL },
+ { PM_CYC_ALT, PM_CYC },
};
static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index c9eb5232e68b..c393e837648e 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -133,11 +133,11 @@ int p9_dd22_bl_ev[] = {
/* Table of alternatives, sorted by column 0 */
static const unsigned int power9_event_alternatives[][MAX_ALT] = {
- { PM_INST_DISP, PM_INST_DISP_ALT },
- { PM_RUN_CYC_ALT, PM_RUN_CYC },
- { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
- { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
{ PM_BR_2PATH, PM_BR_2PATH_ALT },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+ { PM_RUN_CYC_ALT, PM_RUN_CYC },
+ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
};
static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f58728d5f10d..39962c905542 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -462,7 +462,6 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
{
struct papr_scm_perf_stat *stat;
struct papr_scm_perf_stats *stats;
- char *statid;
int index, rc, count;
u32 available_events;
@@ -493,14 +492,12 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu
for (index = 0, stat = stats->scm_statistic, count = 0;
index < available_events; index++, ++stat) {
- statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
- if (!statid) {
+ p->nvdimm_events_map[count] = kmemdup_nul(stat->stat_id, 8, GFP_KERNEL);
+ if (!p->nvdimm_events_map[count]) {
rc = -ENOMEM;
goto out_nvdimm_events_map;
}
- strcpy(statid, stat->stat_id);
- p->nvdimm_events_map[count] = statid;
count++;
}
p->nvdimm_events_map[count] = NULL;
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c
index 909535ca513a..ec65586cbeb3 100644
--- a/arch/powerpc/platforms/pseries/vas-sysfs.c
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -27,22 +27,31 @@ struct vas_caps_entry {
/*
* This function is used to get the notification from the drmgr when
- * QoS credits are changed. Though receiving the target total QoS
- * credits here, get the official QoS capabilities from the hypervisor.
+ * QoS credits are changed.
*/
-static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps,
+static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps,
const char *buf, size_t count)
{
int err;
u16 creds;
err = kstrtou16(buf, 0, &creds);
+ /*
+ * The user space interface from the management console
+ * notifies OS with the new QoS credits and then the
+ * hypervisor. So OS has to use this new credits value
+ * and reconfigure VAS windows (close or reopen depends
+ * on the credits available) instead of depending on VAS
+ * QoS capabilities from the hypervisor.
+ */
if (!err)
- err = vas_reconfig_capabilties(caps->win_type);
+ err = vas_reconfig_capabilties(caps->win_type, creds);
if (err)
return -EINVAL;
+ pr_info("Set QoS total credits %u\n", creds);
+
return count;
}
@@ -92,7 +101,7 @@ VAS_ATTR_RO(nr_total_credits);
VAS_ATTR_RO(nr_used_credits);
static struct vas_sysfs_entry update_total_credits_attribute =
- __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger);
+ __ATTR(update_total_credits, 0200, NULL, update_total_credits_store);
static struct attribute *vas_def_capab_attrs[] = {
&nr_total_credits_attribute.attr,
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 1f59d78c77a1..ec643bbdb67f 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -779,10 +779,10 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
* changes. Reconfig window configurations based on the credits
* availability from this new capabilities.
*/
-int vas_reconfig_capabilties(u8 type)
+int vas_reconfig_capabilties(u8 type, int new_nr_creds)
{
struct vas_cop_feat_caps *caps;
- int old_nr_creds, new_nr_creds;
+ int old_nr_creds;
struct vas_caps *vcaps;
int rc = 0, nr_active_wins;
@@ -795,12 +795,6 @@ int vas_reconfig_capabilties(u8 type)
caps = &vcaps->caps;
mutex_lock(&vas_pseries_mutex);
- rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat,
- (u64)virt_to_phys(&hv_cop_caps));
- if (rc)
- goto out;
-
- new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
old_nr_creds = atomic_read(&caps->nr_total_credits);
@@ -832,7 +826,6 @@ int vas_reconfig_capabilties(u8 type)
false);
}
-out:
mutex_unlock(&vas_pseries_mutex);
return rc;
}
@@ -850,7 +843,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn;
const __be32 *intserv = NULL;
- int len, rc = 0;
+ int new_nr_creds, len, rc = 0;
if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE))
@@ -862,7 +855,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv)
return NOTIFY_OK;
- rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE);
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
+ new_nr_creds);
+ }
+
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 34177881e998..333ffa2f9f42 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -135,7 +135,7 @@ struct pseries_vas_window {
};
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
-int vas_reconfig_capabilties(u8 type);
+int vas_reconfig_capabilties(u8 type, int new_nr_creds);
int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 34592d00dde8..f6ef358d8a2c 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -38,7 +38,7 @@ config SOC_VIRT
select SIFIVE_PLIC
select PM_GENERIC_DOMAINS if PM
select PM_GENERIC_DOMAINS_OF if PM && OF
- select RISCV_SBI_CPUIDLE if CPU_IDLE
+ select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI
help
This enables support for QEMU Virt Machine.
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
index 854320e17b28..ccaac3371cf9 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
@@ -7,7 +7,7 @@
reg = <0x0 0x41000000 0x0 0xF0>;
microchip,sync-update-mask = /bits/ 32 <0>;
#pwm-cells = <2>;
- clocks = <&clkcfg CLK_FIC3>;
+ clocks = <&fabric_clk3>;
status = "disabled";
};
@@ -16,10 +16,22 @@
reg = <0x0 0x44000000 0x0 0x1000>;
#address-cells = <1>;
#size-cells = <0>;
- clocks = <&clkcfg CLK_FIC3>;
+ clocks = <&fabric_clk3>;
interrupt-parent = <&plic>;
interrupts = <122>;
clock-frequency = <100000>;
status = "disabled";
};
+
+ fabric_clk3: fabric-clk3 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <62500000>;
+ };
+
+ fabric_clk1: fabric-clk1 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <125000000>;
+ };
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index cd2fe80fa81a..3392153dd0f1 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -45,7 +45,7 @@
};
&refclk {
- clock-frequency = <600000000>;
+ clock-frequency = <125000000>;
};
&mmuart1 {
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index c5c9d1360de0..cf2f55e1dcb6 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -141,7 +141,7 @@
};
};
- refclk: msspllclk {
+ refclk: mssrefclk {
compatible = "fixed-clock";
#clock-cells = <0>;
};
@@ -190,7 +190,7 @@
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
- reg = <0x0 0x20002000 0x0 0x1000>;
+ reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
clocks = <&refclk>;
#clock-cells = <1>;
};
@@ -366,7 +366,7 @@
gpio1: gpio@20121000 {
compatible = "microchip,mpfs-gpio";
- reg = <000 0x20121000 0x0 0x1000>;
+ reg = <0x0 0x20121000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupt-controller;
#interrupt-cells = <1>;
@@ -393,8 +393,8 @@
reg = <0x0 0x20124000 0x0 0x1000>;
interrupt-parent = <&plic>;
interrupts = <80>, <81>;
- clocks = <&clkcfg CLK_RTC>;
- clock-names = "rtc";
+ clocks = <&clkcfg CLK_RTC>, <&clkcfg CLK_RTCREF>;
+ clock-names = "rtc", "rtcref";
status = "disabled";
};
@@ -424,7 +424,7 @@
<0 0 0 3 &pcie_intc 2>,
<0 0 0 4 &pcie_intc 3>;
interrupt-map-mask = <0 0 0 7>;
- clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>;
+ clocks = <&fabric_clk1>, <&fabric_clk1>, <&fabric_clk3>;
clock-names = "fic0", "fic1", "fic3";
ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
msi-parent = <&pcie>;
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index aad45d7f498f..5c638fd5b35c 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -167,7 +167,7 @@
clocks = <&prci FU540_PRCI_CLK_TLCLK>;
status = "disabled";
};
- dma: dma@3000000 {
+ dma: dma-controller@3000000 {
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic0>;
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 30e3017f22bc..0cc17db8aaba 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -101,6 +101,7 @@ CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 7e5efdc3829d..6cd9d84d3e13 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -93,6 +93,7 @@ CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 78da839657e5..cd4bbcecb0fb 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -193,9 +193,6 @@ struct kvm_vcpu_arch {
/* Don't run the VCPU (blocked) */
bool pause;
-
- /* SRCU lock index for in-kernel run loop */
- int srcu_idx;
};
static inline void kvm_arch_hardware_unsetup(void) {}
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 0b552873a577..765004b60513 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -104,7 +104,7 @@ static int patch_text_cb(void *data)
struct patch_insn *patch = data;
int ret = 0;
- if (atomic_inc_return(&patch->cpu_count) == 1) {
+ if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
ret =
patch_text_nosync(patch->addr, &patch->insn,
GET_INSN_LENGTH(patch->insn));
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 6785aef4cbd4..7461f964d20a 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -38,14 +38,16 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
-#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \
- riscv_isa_extension_mask(c) | \
- riscv_isa_extension_mask(d) | \
- riscv_isa_extension_mask(f) | \
- riscv_isa_extension_mask(i) | \
- riscv_isa_extension_mask(m) | \
- riscv_isa_extension_mask(s) | \
- riscv_isa_extension_mask(u))
+#define KVM_RISCV_ISA_DISABLE_ALLOWED (riscv_isa_extension_mask(d) | \
+ riscv_isa_extension_mask(f))
+
+#define KVM_RISCV_ISA_DISABLE_NOT_ALLOWED (riscv_isa_extension_mask(a) | \
+ riscv_isa_extension_mask(c) | \
+ riscv_isa_extension_mask(i) | \
+ riscv_isa_extension_mask(m))
+
+#define KVM_RISCV_ISA_ALLOWED (KVM_RISCV_ISA_DISABLE_ALLOWED | \
+ KVM_RISCV_ISA_DISABLE_NOT_ALLOWED)
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
{
@@ -219,7 +221,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
switch (reg_num) {
case KVM_REG_RISCV_CONFIG_REG(isa):
if (!vcpu->arch.ran_atleast_once) {
- vcpu->arch.isa = reg_val;
+ /* Ignore the disable request for these extensions */
+ vcpu->arch.isa = reg_val | KVM_RISCV_ISA_DISABLE_NOT_ALLOWED;
vcpu->arch.isa &= riscv_isa_extension_base(NULL);
vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
kvm_riscv_vcpu_fp_reset(vcpu);
@@ -724,13 +727,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Mark this VCPU ran at least once */
vcpu->arch.ran_atleast_once = true;
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
/* Process MMIO value returned from user-space */
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
if (ret) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return ret;
}
}
@@ -739,13 +742,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
if (ret) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return ret;
}
}
if (run->immediate_exit) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return -EINTR;
}
@@ -784,7 +787,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
vcpu->mode = IN_GUEST_MODE;
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
smp_mb__after_srcu_read_unlock();
/*
@@ -802,7 +805,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
preempt_enable();
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
continue;
}
@@ -846,7 +849,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
preempt_enable();
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
}
@@ -855,7 +858,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu_put(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return ret;
}
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index aa8af129e4bb..a72c15d4b42a 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -456,9 +456,9 @@ static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
{
if (!kvm_arch_vcpu_runnable(vcpu)) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_halt(vcpu);
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index a09ecb97b890..d45e7da3f0d3 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -83,7 +83,7 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
struct kvm_run *run,
- u32 type, u64 flags)
+ u32 type, u64 reason)
{
unsigned long i;
struct kvm_vcpu *tmp;
@@ -94,7 +94,8 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
memset(&run->system_event, 0, sizeof(run->system_event));
run->system_event.type = type;
- run->system_event.flags = flags;
+ run->system_event.ndata = 1;
+ run->system_event.data[0] = reason;
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 9535bea8688c..05ed641a1134 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -208,8 +208,25 @@ static void __init setup_bootmem(void)
* early_init_fdt_reserve_self() since __pa() does
* not work for DTB pointers that are fixmap addresses
*/
- if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
- memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+ if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) {
+ /*
+ * In case the DTB is not located in a memory region we won't
+ * be able to locate it later on via the linear mapping and
+ * get a segfault when accessing it via __va(dtb_early_pa).
+ * To avoid this situation copy DTB to a memory region.
+ * Note that memblock_phys_alloc will also reserve DTB region.
+ */
+ if (!memblock_is_memory(dtb_early_pa)) {
+ size_t fdt_size = fdt_totalsize(dtb_early_va);
+ phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE);
+ void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size);
+
+ memcpy(new_dtb_early_va, dtb_early_va, fdt_size);
+ early_memunmap(new_dtb_early_va, fdt_size);
+ _dtb_early_pa = new_dtb_early_pa;
+ } else
+ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+ }
early_init_fdt_scan_reserved_mem();
dma_contiguous_reserve(dma32_phys_limit);
@@ -718,6 +735,7 @@ retry:
if (!check_l4) {
disable_pgtable_l5();
check_l4 = true;
+ memset(early_pg_dir, 0, PAGE_SIZE);
goto retry;
}
disable_pgtable_l4();
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e441b60b1812..df325eacf62d 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -30,6 +30,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
+
+ifdef CONFIG_CC_IS_GCC
+ ifeq ($(call cc-ifversion, -ge, 1200, y), y)
+ ifeq ($(call cc-ifversion, -lt, 1300, y), y)
+ KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds)
+ endif
+ endif
+endif
+
UTS_MACHINE := s390x
STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
CHECKFLAGS += -D__s390__ -D__s390x__
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9b30beac904d..af96dc0549a4 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1334,11 +1334,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_halt(vcpu);
vcpu->valid_wakeup = false;
__unset_cpu_idle(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
hrtimer_cancel(&vcpu->arch.ckc_timer);
return 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 156d1c25a3c1..76ad6408cb2c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2384,7 +2384,16 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
return -EINVAL;
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
- if (kvm_s390_pv_is_protected(kvm))
+ /*
+ * This is technically a heuristic only, if the kvm->lock is not
+ * taken, it is not guaranteed that the vm is/remains non-protected.
+ * This is ok from a kernel perspective, wrongdoing is detected
+ * on the access, -EFAULT is returned and the vm may crash the
+ * next time it accesses the memory in question.
+ * There is no sane usecase to do switching and a memop on two
+ * different CPUs at the same time.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
return -EINVAL;
if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
if (access_key_invalid(mop->key))
@@ -4237,14 +4246,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
* We try to hold kvm->srcu during most of vcpu_run (except when run-
* ning the guest), so that memslots (and other stuff) are protected
*/
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
do {
rc = vcpu_pre_run(vcpu);
if (rc)
break;
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
@@ -4281,12 +4290,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
local_irq_enable();
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return rc;
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 7f7c0d6af2ce..cc7c9599f43e 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -137,12 +137,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
/* Allocate variable storage */
vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
vlen += uv_info.guest_virt_base_stor_len;
- /*
- * The Create Secure Configuration Ultravisor Call does not support
- * using large pages for the virtual memory area.
- * This is a hardware limitation.
- */
- kvm->arch.pv.stor_var = vmalloc_no_huge(vlen);
+ kvm->arch.pv.stor_var = vzalloc(vlen);
if (!kvm->arch.pv.stor_var)
goto out_err;
return 0;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index acda4b6fc851..dada78b92691 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1091,7 +1091,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
handle_last_fault(vcpu, vsie_page);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
/* save current guest state of bp isolation override */
guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
@@ -1133,7 +1133,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (rc == -EINTR) {
VCPU_EVENT(vcpu, 3, "%s", "machine check");
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index af03cacf34ec..1ac73917a8d3 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1183,6 +1183,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table);
static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
struct gmap_rmap *rmap)
{
+ struct gmap_rmap *temp;
void __rcu **slot;
BUG_ON(!gmap_is_shadow(sg));
@@ -1190,6 +1191,12 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
if (slot) {
rmap->next = radix_tree_deref_slot_protected(slot,
&sg->guest_table_lock);
+ for (temp = rmap->next; temp; temp = temp->next) {
+ if (temp->raddr == rmap->raddr) {
+ kfree(rmap);
+ return;
+ }
+ }
radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
} else {
rmap->next = NULL;
diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h
index 41c6d734a474..adb6991d0455 100644
--- a/arch/sparc/include/asm/cacheflush_32.h
+++ b/arch/sparc/include/asm/cacheflush_32.h
@@ -35,6 +35,7 @@
#define flush_page_for_dma(addr) \
sparc32_cachetlb_ops->page_for_dma(addr)
+struct page;
void sparc_flush_page_to_ram(struct page *page);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b03269faef71..c4344b67628d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -483,7 +483,6 @@ static void ubd_handler(void)
if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
blk_queue_max_discard_sectors(io_req->req->q, 0);
blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
}
blk_mq_end_request(io_req->req, io_req->error);
kfree(io_req);
@@ -800,10 +799,8 @@ static int ubd_open_dev(struct ubd *ubd_dev)
}
if (ubd_dev->no_trim == 0) {
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
- ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
}
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
return 0;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0142e01002e..4bed3abf444d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1866,7 +1866,7 @@ config X86_KERNEL_IBT
code with them to make this happen.
In addition to building the kernel with IBT, seal all functions that
- are not indirect call targets, avoiding them ever becomming one.
+ are not indirect call targets, avoiding them ever becoming one.
This requires LTO like objtool runs and will slow down the build. It
does significantly reduce the number of ENDBR instructions in the
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 6dbd7e9f74c9..0352e4589efa 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -163,7 +163,11 @@ extra_header_fields:
.long 0x200 # SizeOfHeaders
.long 0 # CheckSum
.word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
+#ifdef CONFIG_DXE_MEM_ATTRIBUTES
+ .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics
+#else
.word 0 # DllCharacteristics
+#endif
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4faac48ebec5..73d958522b6a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -337,6 +337,9 @@ SYM_CODE_END(ret_from_fork)
call \cfunc
+ /* For some configurations \cfunc ends up being a noreturn. */
+ REACHABLE
+
jmp error_return
.endm
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 5d7762288a24..48e5db21142c 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -51,7 +51,7 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL
+ * TGL,TNT,RKL,ADL,RPL,SPR
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
@@ -62,7 +62,7 @@
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
- * RPL
+ * RPL,SPR
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
@@ -74,7 +74,7 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL
+ * TGL,TNT,RKL,ADL,RPL,SPR
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
@@ -675,6 +675,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 98938a68251c..bed74a0f2932 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -357,6 +357,11 @@ static inline u32 efi64_convert_status(efi_status_t status)
runtime), \
func, __VA_ARGS__))
+#define efi_dxe_call(func, ...) \
+ (efi_is_native() \
+ ? efi_dxe_table->func(__VA_ARGS__) \
+ : __efi64_thunk_map(efi_dxe_table, func, __VA_ARGS__))
+
#else /* CONFIG_EFI_MIXED */
static inline bool efi_is_64bit(void)
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 048b6d5aff50..def6ca121111 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -26,6 +26,7 @@
* _G - parts with extra graphics on
* _X - regular server parts
* _D - micro server parts
+ * _N,_P - other mobile parts
*
* Historical OPTDIFFs:
*
@@ -107,8 +108,10 @@
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
+#define INTEL_FAM6_ALDERLAKE_N 0xBE
#define INTEL_FAM6_RAPTORLAKE 0xB7
+#define INTEL_FAM6_RAPTORLAKE_P 0xBA
/* "Small Core" Processors (Atom) */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 3c368b639c04..1a6d7e3f6c32 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -118,6 +118,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
+KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
KVM_X86_OP(get_msr_feature)
KVM_X86_OP(can_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0c0f0e1f754..4ff36610af6a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1484,6 +1484,7 @@ struct kvm_x86_ops {
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ void (*guest_memory_reclaimed)(struct kvm *kvm);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index d6bfdfb0f0af..0c3d3440fe27 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -131,10 +131,12 @@ extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
void reload_early_microcode(void);
extern bool initrd_gone;
+void microcode_bsp_resume(void);
#else
static inline void __init load_ucode_bsp(void) { }
static inline void load_ucode_ap(void) { }
static inline void reload_early_microcode(void) { }
+static inline void microcode_bsp_resume(void) { }
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 40497a9020c6..407084d9fd99 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -559,10 +559,6 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
-
-struct mm_struct;
-extern pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
- unsigned int *level);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 2455d721503e..2d8dacd02643 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -26,6 +26,7 @@
".align 4 \n" \
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
+ ANNOTATE_NOENDBR \
insns " \n" \
".byte 0x53, 0x43, 0x54 \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index f955d25076ba..239ff5fcec6a 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -758,9 +758,9 @@ static struct subsys_interface mc_cpu_interface = {
};
/**
- * mc_bp_resume - Update boot CPU microcode during resume.
+ * microcode_bsp_resume - Update boot CPU microcode during resume.
*/
-static void mc_bp_resume(void)
+void microcode_bsp_resume(void)
{
int cpu = smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -772,7 +772,7 @@ static void mc_bp_resume(void)
}
static struct syscore_ops mc_syscore_ops = {
- .resume = mc_bp_resume,
+ .resume = microcode_bsp_resume,
};
static int mc_cpu_starting(unsigned int cpu)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c049561f373a..e28ab0ecc537 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -41,17 +41,7 @@ struct fpu_state_config fpu_user_cfg __ro_after_init;
*/
struct fpstate init_fpstate __ro_after_init;
-/*
- * Track whether the kernel is using the FPU state
- * currently.
- *
- * This flag is used:
- *
- * - by IRQ context code to potentially use the FPU
- * if it's unused.
- *
- * - to debug kernel_fpu_begin()/end() correctness
- */
+/* Track in-kernel FPU usage */
static DEFINE_PER_CPU(bool, in_kernel_fpu);
/*
@@ -59,42 +49,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-static bool kernel_fpu_disabled(void)
-{
- return this_cpu_read(in_kernel_fpu);
-}
-
-static bool interrupted_kernel_fpu_idle(void)
-{
- return !kernel_fpu_disabled();
-}
-
-/*
- * Were we in user mode (or vm86 mode) when we were
- * interrupted?
- *
- * Doing kernel_fpu_begin/end() is ok if we are running
- * in an interrupt context from user mode - we'll just
- * save the FPU state as required.
- */
-static bool interrupted_user_mode(void)
-{
- struct pt_regs *regs = get_irq_regs();
- return regs && user_mode(regs);
-}
-
/*
* Can we use the FPU in kernel mode with the
* whole "kernel_fpu_begin/end()" sequence?
- *
- * It's always ok in process context (ie "not interrupt")
- * but it is sometimes ok even from an irq.
*/
bool irq_fpu_usable(void)
{
- return !in_interrupt() ||
- interrupted_user_mode() ||
- interrupted_kernel_fpu_idle();
+ if (WARN_ON_ONCE(in_nmi()))
+ return false;
+
+ /* In kernel FPU usage already active? */
+ if (this_cpu_read(in_kernel_fpu))
+ return false;
+
+ /*
+ * When not in NMI or hard interrupt context, FPU can be used in:
+ *
+ * - Task context except from within fpregs_lock()'ed critical
+ * regions.
+ *
+ * - Soft interrupt processing context which cannot happen
+ * while in a fpregs_lock()'ed critical region.
+ */
+ if (!in_hardirq())
+ return true;
+
+ /*
+ * In hard interrupt context it's safe when soft interrupts
+ * are enabled, which means the interrupt did not hit in
+ * a fpregs_lock()'ed critical region.
+ */
+ return !softirq_count();
}
EXPORT_SYMBOL(irq_fpu_usable);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a22deb58f86d..8b1c45c9cda8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -69,6 +69,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
+static int has_guest_poll = 0;
/*
* No need for any "IO delay" on KVM
*/
@@ -706,14 +707,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
static int kvm_suspend(void)
{
+ u64 val = 0;
+
kvm_guest_cpu_offline(false);
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+ if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
+ rdmsrl(MSR_KVM_POLL_CONTROL, val);
+ has_guest_poll = !(val & 1);
+#endif
return 0;
}
static void kvm_resume(void)
{
kvm_cpu_online(raw_smp_processor_id());
+
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+ if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
+ wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+#endif
}
static struct syscore_ops kvm_syscore_ops = {
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 794fdef2501a..38185aedf7d1 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -339,11 +339,11 @@ static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
struct stack_info *info = &state->stack_info;
void *addr = (void *)_addr;
- if (!on_stack(info, addr, len) &&
- (get_stack_info(addr, state->task, info, &state->stack_mask)))
- return false;
+ if (on_stack(info, addr, len))
+ return true;
- return true;
+ return !get_stack_info(addr, state->task, info, &state->stack_mask) &&
+ on_stack(info, addr, len);
}
static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b24ca7f4ed7c..0c1ba6aa0765 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -887,6 +887,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
union cpuid10_eax eax;
union cpuid10_edx edx;
+ if (!static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+
perf_get_x86_pmu_capability(&cap);
/*
@@ -1085,12 +1090,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
case 0x80000000:
entry->eax = min(entry->eax, 0x80000021);
/*
- * Serializing LFENCE is reported in a multitude of ways,
- * and NullSegClearsBase is not reported in CPUID on Zen2;
- * help userspace by providing the CPUID leaf ourselves.
+ * Serializing LFENCE is reported in a multitude of ways, and
+ * NullSegClearsBase is not reported in CPUID on Zen2; help
+ * userspace by providing the CPUID leaf ourselves.
+ *
+ * However, only do it if the host has CPUID leaf 0x8000001d.
+ * QEMU thinks that it can query the host blindly for that
+ * CPUID leaf if KVM reports that it supports 0x8000001d or
+ * above. The processor merrily returns values from the
+ * highest Intel leaf which QEMU tries to use as the guest's
+ * 0x8000001d. Even worse, this can result in an infinite
+ * loop if said highest leaf has no subleaves indexed by ECX.
*/
- if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
- || !static_cpu_has_bug(X86_BUG_NULL_SEG))
+ if (entry->eax >= 0x8000001d &&
+ (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)
+ || !static_cpu_has_bug(X86_BUG_NULL_SEG)))
entry->eax = max(entry->eax, 0x80000021);
break;
case 0x80000001:
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 46f9dfb60469..a0702b6be3e8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1914,7 +1914,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
struct hv_send_ipi_ex send_ipi_ex;
struct hv_send_ipi send_ipi;
DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
- unsigned long valid_bank_mask;
+ u64 valid_bank_mask;
u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
u32 vector;
bool all_cpus;
@@ -1956,7 +1956,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
- if (hc->var_cnt != bitmap_weight(&valid_bank_mask, 64))
+ if (hc->var_cnt != bitmap_weight((unsigned long *)&valid_bank_mask, 64))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
if (all_cpus)
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e6cae6f22683..a335e7f1f69e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -65,6 +65,30 @@ static __always_inline u64 rsvd_bits(int s, int e)
return ((2ULL << (e - s)) - 1) << s;
}
+/*
+ * The number of non-reserved physical address bits irrespective of features
+ * that repurpose legal bits, e.g. MKTME.
+ */
+extern u8 __read_mostly shadow_phys_bits;
+
+static inline gfn_t kvm_mmu_max_gfn(void)
+{
+ /*
+ * Note that this uses the host MAXPHYADDR, not the guest's.
+ * EPT/NPT cannot support GPAs that would exceed host.MAXPHYADDR;
+ * assuming KVM is running on bare metal, guest accesses beyond
+ * host.MAXPHYADDR will hit a #PF(RSVD) and never cause a vmexit
+ * (either EPT Violation/Misconfig or #NPF), and so KVM will never
+ * install a SPTE for such addresses. If KVM is running as a VM
+ * itself, on the other hand, it might see a MAXPHYADDR that is less
+ * than hardware's real MAXPHYADDR. Using the host MAXPHYADDR
+ * disallows such SPTEs entirely and simplifies the TDP MMU.
+ */
+ int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52;
+
+ return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
+}
+
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f9080ee50ffa..45e1573f8f1d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -473,30 +473,6 @@ retry:
}
#endif
-static bool spte_has_volatile_bits(u64 spte)
-{
- if (!is_shadow_present_pte(spte))
- return false;
-
- /*
- * Always atomically update spte if it can be updated
- * out of mmu-lock, it can ensure dirty bit is not lost,
- * also, it can help us to get a stable is_writable_pte()
- * to ensure tlb flush is not missed.
- */
- if (spte_can_locklessly_be_made_writable(spte) ||
- is_access_track_spte(spte))
- return true;
-
- if (spte_ad_enabled(spte)) {
- if ((spte & shadow_accessed_mask) == 0 ||
- (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
- return true;
- }
-
- return false;
-}
-
/* Rules for using mmu_spte_set:
* Set the sptep from nonpresent to present.
* Note: the sptep being assigned *must* be either not present
@@ -557,7 +533,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
* we always atomically update it, see the comments in
* spte_has_volatile_bits().
*/
- if (spte_can_locklessly_be_made_writable(old_spte) &&
+ if (is_mmu_writable_spte(old_spte) &&
!is_writable_pte(new_spte))
flush = true;
@@ -591,7 +567,8 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
u64 old_spte = *sptep;
int level = sptep_to_sp(sptep)->role.level;
- if (!spte_has_volatile_bits(old_spte))
+ if (!is_shadow_present_pte(old_spte) ||
+ !spte_has_volatile_bits(old_spte))
__update_clear_spte_fast(sptep, 0ull);
else
old_spte = __update_clear_spte_slow(sptep, 0ull);
@@ -1187,7 +1164,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
u64 spte = *sptep;
if (!is_writable_pte(spte) &&
- !(pt_protect && spte_can_locklessly_be_made_writable(spte)))
+ !(pt_protect && is_mmu_writable_spte(spte)))
return false;
rmap_printk("spte %p %llx\n", sptep, *sptep);
@@ -2804,8 +2781,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
const struct kvm_memory_slot *slot)
{
unsigned long hva;
- pte_t *pte;
- int level;
+ unsigned long flags;
+ int level = PG_LEVEL_4K;
+ pgd_t pgd;
+ p4d_t p4d;
+ pud_t pud;
+ pmd_t pmd;
if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
return PG_LEVEL_4K;
@@ -2820,10 +2801,43 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
*/
hva = __gfn_to_hva_memslot(slot, gfn);
- pte = lookup_address_in_mm(kvm->mm, hva, &level);
- if (unlikely(!pte))
- return PG_LEVEL_4K;
+ /*
+ * Lookup the mapping level in the current mm. The information
+ * may become stale soon, but it is safe to use as long as
+ * 1) mmu_notifier_retry was checked after taking mmu_lock, and
+ * 2) mmu_lock is taken now.
+ *
+ * We still need to disable IRQs to prevent concurrent tear down
+ * of page tables.
+ */
+ local_irq_save(flags);
+ pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
+ if (pgd_none(pgd))
+ goto out;
+
+ p4d = READ_ONCE(*p4d_offset(&pgd, hva));
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ goto out;
+
+ pud = READ_ONCE(*pud_offset(&p4d, hva));
+ if (pud_none(pud) || !pud_present(pud))
+ goto out;
+
+ if (pud_large(pud)) {
+ level = PG_LEVEL_1G;
+ goto out;
+ }
+
+ pmd = READ_ONCE(*pmd_offset(&pud, hva));
+ if (pmd_none(pmd) || !pmd_present(pmd))
+ goto out;
+
+ if (pmd_large(pmd))
+ level = PG_LEVEL_2M;
+
+out:
+ local_irq_restore(flags);
return level;
}
@@ -2992,9 +3006,15 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
/*
* If MMIO caching is disabled, emulate immediately without
* touching the shadow page tables as attempting to install an
- * MMIO SPTE will just be an expensive nop.
+ * MMIO SPTE will just be an expensive nop. Do not cache MMIO
+ * whose gfn is greater than host.MAXPHYADDR, any guest that
+ * generates such gfns is running nested and is being tricked
+ * by L0 userspace (you can observe gfn > L1.MAXPHYADDR if
+ * and only if L1's MAXPHYADDR is inaccurate with respect to
+ * the hardware's).
*/
- if (unlikely(!shadow_mmio_value)) {
+ if (unlikely(!shadow_mmio_value) ||
+ unlikely(fault->gfn > kvm_mmu_max_gfn())) {
*ret_val = RET_PF_EMULATE;
return true;
}
@@ -3153,8 +3173,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
* be removed in the fast path only if the SPTE was
* write-protected for dirty-logging or access tracking.
*/
- if (fault->write &&
- spte_can_locklessly_be_made_writable(spte)) {
+ if (fault->write && is_mmu_writable_spte(spte)) {
new_spte |= PT_WRITABLE_MASK;
/*
@@ -5451,14 +5470,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
uint i;
if (pcid == kvm_get_active_pcid(vcpu)) {
- mmu->invlpg(vcpu, gva, mmu->root.hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->root.hpa);
tlb_flush = true;
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) {
- mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ if (mmu->invlpg)
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
tlb_flush = true;
}
}
@@ -5646,6 +5667,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;
int nr_zapped, batch = 0;
+ bool unstable;
restart:
list_for_each_entry_safe_reverse(sp, node,
@@ -5677,11 +5699,12 @@ restart:
goto restart;
}
- if (__kvm_mmu_prepare_zap_page(kvm, sp,
- &kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
- batch += nr_zapped;
+ unstable = __kvm_mmu_prepare_zap_page(kvm, sp,
+ &kvm->arch.zapped_obsolete_pages, &nr_zapped);
+ batch += nr_zapped;
+
+ if (unstable)
goto restart;
- }
}
/*
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 4739b53c9734..e5c0b6db6f2c 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -90,6 +90,34 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
E820_TYPE_RAM);
}
+/*
+ * Returns true if the SPTE has bits that may be set without holding mmu_lock.
+ * The caller is responsible for checking if the SPTE is shadow-present, and
+ * for determining whether or not the caller cares about non-leaf SPTEs.
+ */
+bool spte_has_volatile_bits(u64 spte)
+{
+ /*
+ * Always atomically update spte if it can be updated
+ * out of mmu-lock, it can ensure dirty bit is not lost,
+ * also, it can help us to get a stable is_writable_pte()
+ * to ensure tlb flush is not missed.
+ */
+ if (!is_writable_pte(spte) && is_mmu_writable_spte(spte))
+ return true;
+
+ if (is_access_track_spte(spte))
+ return true;
+
+ if (spte_ad_enabled(spte)) {
+ if (!(spte & shadow_accessed_mask) ||
+ (is_writable_pte(spte) && !(spte & shadow_dirty_mask)))
+ return true;
+ }
+
+ return false;
+}
+
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
const struct kvm_memory_slot *slot,
unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 73f12615416f..80ab0f5cff01 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -201,12 +201,6 @@ static inline bool is_removed_spte(u64 spte)
*/
extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
-/*
- * The number of non-reserved physical address bits irrespective of features
- * that repurpose legal bits, e.g. MKTME.
- */
-extern u8 __read_mostly shadow_phys_bits;
-
static inline bool is_mmio_spte(u64 spte)
{
return (spte & shadow_mmio_mask) == shadow_mmio_value &&
@@ -396,7 +390,7 @@ static inline void check_spte_writable_invariants(u64 spte)
"kvm: Writable SPTE is not MMU-writable: %llx", spte);
}
-static inline bool spte_can_locklessly_be_made_writable(u64 spte)
+static inline bool is_mmu_writable_spte(u64 spte)
{
return spte & shadow_mmu_writable_mask;
}
@@ -410,6 +404,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
return gen;
}
+bool spte_has_volatile_bits(u64 spte);
+
bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
const struct kvm_memory_slot *slot,
unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index b1eaf6ec0e0b..f0af385c56e0 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -6,6 +6,7 @@
#include <linux/kvm_host.h>
#include "mmu.h"
+#include "spte.h"
/*
* TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
@@ -17,9 +18,38 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
{
return READ_ONCE(*rcu_dereference(sptep));
}
-static inline void kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 val)
+
+static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
+{
+ return xchg(rcu_dereference(sptep), new_spte);
+}
+
+static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
+{
+ WRITE_ONCE(*rcu_dereference(sptep), new_spte);
+}
+
+static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
+ u64 new_spte, int level)
{
- WRITE_ONCE(*rcu_dereference(sptep), val);
+ /*
+ * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
+ * volatile bits, i.e. has bits that can be set outside of mmu_lock.
+ * The Writable bit can be set by KVM's fast page fault handler, and
+ * Accessed and Dirty bits can be set by the CPU.
+ *
+ * Note, non-leaf SPTEs do have Accessed bits and those bits are
+ * technically volatile, but KVM doesn't consume the Accessed bit of
+ * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
+ * logic needs to be reassessed if KVM were to use non-leaf Accessed
+ * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
+ */
+ if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
+ spte_has_volatile_bits(old_spte))
+ return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
+
+ __kvm_tdp_mmu_write_spte(sptep, new_spte);
+ return old_spte;
}
/*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c472769e0300..922b06bf4b94 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -426,9 +426,9 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
tdp_mmu_unlink_sp(kvm, sp, shared);
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
- u64 *sptep = rcu_dereference(pt) + i;
+ tdp_ptep_t sptep = pt + i;
gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
- u64 old_child_spte;
+ u64 old_spte;
if (shared) {
/*
@@ -440,8 +440,8 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
* value to the removed SPTE value.
*/
for (;;) {
- old_child_spte = xchg(sptep, REMOVED_SPTE);
- if (!is_removed_spte(old_child_spte))
+ old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, REMOVED_SPTE);
+ if (!is_removed_spte(old_spte))
break;
cpu_relax();
}
@@ -455,23 +455,43 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
* are guarded by the memslots generation, not by being
* unreachable.
*/
- old_child_spte = READ_ONCE(*sptep);
- if (!is_shadow_present_pte(old_child_spte))
+ old_spte = kvm_tdp_mmu_read_spte(sptep);
+ if (!is_shadow_present_pte(old_spte))
continue;
/*
- * Marking the SPTE as a removed SPTE is not
- * strictly necessary here as the MMU lock will
- * stop other threads from concurrently modifying
- * this SPTE. Using the removed SPTE value keeps
- * the two branches consistent and simplifies
- * the function.
+ * Use the common helper instead of a raw WRITE_ONCE as
+ * the SPTE needs to be updated atomically if it can be
+ * modified by a different vCPU outside of mmu_lock.
+ * Even though the parent SPTE is !PRESENT, the TLB
+ * hasn't yet been flushed, and both Intel and AMD
+ * document that A/D assists can use upper-level PxE
+ * entries that are cached in the TLB, i.e. the CPU can
+ * still access the page and mark it dirty.
+ *
+ * No retry is needed in the atomic update path as the
+ * sole concern is dropping a Dirty bit, i.e. no other
+ * task can zap/remove the SPTE as mmu_lock is held for
+ * write. Marking the SPTE as a removed SPTE is not
+ * strictly necessary for the same reason, but using
+ * the remove SPTE value keeps the shared/exclusive
+ * paths consistent and allows the handle_changed_spte()
+ * call below to hardcode the new value to REMOVED_SPTE.
+ *
+ * Note, even though dropping a Dirty bit is the only
+ * scenario where a non-atomic update could result in a
+ * functional bug, simply checking the Dirty bit isn't
+ * sufficient as a fast page fault could read the upper
+ * level SPTE before it is zapped, and then make this
+ * target SPTE writable, resume the guest, and set the
+ * Dirty bit between reading the SPTE above and writing
+ * it here.
*/
- WRITE_ONCE(*sptep, REMOVED_SPTE);
+ old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte,
+ REMOVED_SPTE, level);
}
handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
- old_child_spte, REMOVED_SPTE, level,
- shared);
+ old_spte, REMOVED_SPTE, level, shared);
}
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@@ -667,14 +687,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
KVM_PAGES_PER_HPAGE(iter->level));
/*
- * No other thread can overwrite the removed SPTE as they
- * must either wait on the MMU lock or use
- * tdp_mmu_set_spte_atomic which will not overwrite the
- * special removed SPTE value. No bookkeeping is needed
- * here since the SPTE is going from non-present
- * to non-present.
+ * No other thread can overwrite the removed SPTE as they must either
+ * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
+ * overwrite the special removed SPTE value. No bookkeeping is needed
+ * here since the SPTE is going from non-present to non-present. Use
+ * the raw write helper to avoid an unnecessary check on volatile bits.
*/
- kvm_tdp_mmu_write_spte(iter->sptep, 0);
+ __kvm_tdp_mmu_write_spte(iter->sptep, 0);
return 0;
}
@@ -699,10 +718,13 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* unless performing certain dirty logging operations.
* Leaving record_dirty_log unset in that case prevents page
* writes from being double counted.
+ *
+ * Returns the old SPTE value, which _may_ be different than @old_spte if the
+ * SPTE had voldatile bits.
*/
-static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
- u64 old_spte, u64 new_spte, gfn_t gfn, int level,
- bool record_acc_track, bool record_dirty_log)
+static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
+ u64 old_spte, u64 new_spte, gfn_t gfn, int level,
+ bool record_acc_track, bool record_dirty_log)
{
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -715,7 +737,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
*/
WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
- kvm_tdp_mmu_write_spte(sptep, new_spte);
+ old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
@@ -724,6 +746,7 @@ static void __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
if (record_dirty_log)
handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
new_spte, level);
+ return old_spte;
}
static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
@@ -732,9 +755,10 @@ static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
{
WARN_ON_ONCE(iter->yielded);
- __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, iter->old_spte,
- new_spte, iter->gfn, iter->level,
- record_acc_track, record_dirty_log);
+ iter->old_spte = __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
+ iter->old_spte, new_spte,
+ iter->gfn, iter->level,
+ record_acc_track, record_dirty_log);
}
static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
@@ -815,14 +839,15 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
return iter->yielded;
}
-static inline gfn_t tdp_mmu_max_gfn_host(void)
+static inline gfn_t tdp_mmu_max_gfn_exclusive(void)
{
/*
- * Bound TDP MMU walks at host.MAXPHYADDR, guest accesses beyond that
- * will hit a #PF(RSVD) and never hit an EPT Violation/Misconfig / #NPF,
- * and so KVM will never install a SPTE for such addresses.
+ * Bound TDP MMU walks at host.MAXPHYADDR. KVM disallows memslots with
+ * a gpa range that would exceed the max gfn, and KVM does not create
+ * MMIO SPTEs for "impossible" gfns, instead sending such accesses down
+ * the slow emulation path every time.
*/
- return 1ULL << (shadow_phys_bits - PAGE_SHIFT);
+ return kvm_mmu_max_gfn() + 1;
}
static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
@@ -830,7 +855,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
{
struct tdp_iter iter;
- gfn_t end = tdp_mmu_max_gfn_host();
+ gfn_t end = tdp_mmu_max_gfn_exclusive();
gfn_t start = 0;
for_each_tdp_pte_min_level(iter, root, zap_level, start, end) {
@@ -923,7 +948,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
{
struct tdp_iter iter;
- end = min(end, tdp_mmu_max_gfn_host());
+ end = min(end, tdp_mmu_max_gfn_exclusive());
lockdep_assert_held_write(&kvm->mmu_lock);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index eca39f56c231..0604bc29f0b8 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -171,9 +171,12 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
-static int cmp_u64(const void *a, const void *b)
+static int cmp_u64(const void *pa, const void *pb)
{
- return *(__u64 *)a - *(__u64 *)b;
+ u64 a = *(u64 *)pa;
+ u64 b = *(u64 *)pb;
+
+ return (a > b) - (a < b);
}
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 9e66fba1d6a3..22992b049d38 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -138,6 +138,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
return sample_period;
}
+static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
+{
+ if (!pmc->perf_event || pmc->is_paused)
+ return;
+
+ perf_event_period(pmc->perf_event,
+ get_sample_period(pmc, pmc->counter));
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 24eb935b6f85..16a5ebb420cf 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -45,6 +45,22 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
};
+/* duplicated from amd_f17h_perfmon_event_map. */
+static struct kvm_event_hw_type_mapping amd_f17h_event_mapping[] = {
+ [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+ [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+ [2] = { 0x60, 0xff, PERF_COUNT_HW_CACHE_REFERENCES },
+ [3] = { 0x64, 0x09, PERF_COUNT_HW_CACHE_MISSES },
+ [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ [6] = { 0x87, 0x02, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+ [7] = { 0x87, 0x01, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+
+/* amd_pmc_perf_hw_id depends on these being the same size */
+static_assert(ARRAY_SIZE(amd_event_mapping) ==
+ ARRAY_SIZE(amd_f17h_event_mapping));
+
static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
@@ -140,6 +156,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
{
+ struct kvm_event_hw_type_mapping *event_mapping;
u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
@@ -148,15 +165,20 @@ static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
if (WARN_ON(pmc_is_fixed(pmc)))
return PERF_COUNT_HW_MAX;
+ if (guest_cpuid_family(pmc->vcpu) >= 0x17)
+ event_mapping = amd_f17h_event_mapping;
+ else
+ event_mapping = amd_event_mapping;
+
for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
- if (amd_event_mapping[i].eventsel == event_select
- && amd_event_mapping[i].unit_mask == unit_mask)
+ if (event_mapping[i].eventsel == event_select
+ && event_mapping[i].unit_mask == unit_mask)
break;
if (i == ARRAY_SIZE(amd_event_mapping))
return PERF_COUNT_HW_MAX;
- return amd_event_mapping[i].event_type;
+ return event_mapping[i].event_type;
}
/* check if a PMC is enabled by comparing it against global_ctrl bits. Because
@@ -257,6 +279,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
pmc->counter += data - pmc_read_counter(pmc);
+ pmc_update_sample_period(pmc);
return 0;
}
/* MSR_EVNTSELn */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 537aaddc852f..7c392873626f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1594,24 +1594,51 @@ static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
atomic_set_release(&src_sev->migration_in_progress, 0);
}
+/* vCPU mutex subclasses. */
+enum sev_migration_role {
+ SEV_MIGRATION_SOURCE = 0,
+ SEV_MIGRATION_TARGET,
+ SEV_NR_MIGRATION_ROLES,
+};
-static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+static int sev_lock_vcpus_for_migration(struct kvm *kvm,
+ enum sev_migration_role role)
{
struct kvm_vcpu *vcpu;
unsigned long i, j;
+ bool first = true;
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (mutex_lock_killable(&vcpu->mutex))
+ if (mutex_lock_killable_nested(&vcpu->mutex, role))
goto out_unlock;
+
+ if (first) {
+ /*
+ * Reset the role to one that avoids colliding with
+ * the role used for the first vcpu mutex.
+ */
+ role = SEV_NR_MIGRATION_ROLES;
+ first = false;
+ } else {
+ mutex_release(&vcpu->mutex.dep_map, _THIS_IP_);
+ }
}
return 0;
out_unlock:
+
+ first = true;
kvm_for_each_vcpu(j, vcpu, kvm) {
if (i == j)
break;
+ if (first)
+ first = false;
+ else
+ mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_);
+
+
mutex_unlock(&vcpu->mutex);
}
return -EINTR;
@@ -1621,8 +1648,15 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
+ bool first = true;
kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (first)
+ first = false;
+ else
+ mutex_acquire(&vcpu->mutex.dep_map,
+ SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_);
+
mutex_unlock(&vcpu->mutex);
}
}
@@ -1748,10 +1782,10 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
charged = true;
}
- ret = sev_lock_vcpus_for_migration(kvm);
+ ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE);
if (ret)
goto out_dst_cgroup;
- ret = sev_lock_vcpus_for_migration(source_kvm);
+ ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET);
if (ret)
goto out_dst_vcpu;
@@ -2226,51 +2260,47 @@ int sev_cpu_init(struct svm_cpu_data *sd)
* Pages used by hardware to hold guest encrypted state must be flushed before
* returning them to the system.
*/
-static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
- unsigned long len)
+static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
+ int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+
/*
- * If hardware enforced cache coherency for encrypted mappings of the
- * same physical page is supported, nothing to do.
+ * Note! The address must be a kernel address, as regular page walk
+ * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user
+ * address is non-deterministic and unsafe. This function deliberately
+ * takes a pointer to deter passing in a user address.
*/
- if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
- return;
+ unsigned long addr = (unsigned long)va;
/*
- * If the VM Page Flush MSR is supported, use it to flush the page
- * (using the page virtual address and the guest ASID).
+ * If CPU enforced cache coherency for encrypted mappings of the
+ * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache
+ * flush is still needed in order to work properly with DMA devices.
*/
- if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
- struct kvm_sev_info *sev;
- unsigned long va_start;
- u64 start, stop;
-
- /* Align start and stop to page boundaries. */
- va_start = (unsigned long)va;
- start = (u64)va_start & PAGE_MASK;
- stop = PAGE_ALIGN((u64)va_start + len);
-
- if (start < stop) {
- sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) {
+ clflush_cache_range(va, PAGE_SIZE);
+ return;
+ }
- while (start < stop) {
- wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
- start | sev->asid);
+ /*
+ * VM Page Flush takes a host virtual address and a guest ASID. Fall
+ * back to WBINVD if this faults so as not to make any problems worse
+ * by leaving stale encrypted data in the cache.
+ */
+ if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
+ goto do_wbinvd;
- start += PAGE_SIZE;
- }
+ return;
- return;
- }
+do_wbinvd:
+ wbinvd_on_all_cpus();
+}
- WARN(1, "Address overflow, using WBINVD\n");
- }
+void sev_guest_memory_reclaimed(struct kvm *kvm)
+{
+ if (!sev_guest(kvm))
+ return;
- /*
- * Hardware should always have one of the above features,
- * but if not, use WBINVD and issue a warning.
- */
- WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
wbinvd_on_all_cpus();
}
@@ -2284,7 +2314,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
if (vcpu->arch.guest_state_protected)
- sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+ sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa);
+
__free_page(virt_to_page(svm->sev_es.vmsa));
if (svm->sev_es.ghcb_sa_free)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bd4c64b362d2..7e45d03cd018 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4620,6 +4620,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
.mem_enc_unregister_region = sev_mem_enc_unregister_region,
+ .guest_memory_reclaimed = sev_guest_memory_reclaimed,
.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,
.vm_move_enc_context_from = sev_vm_move_enc_context_from,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f77a7d2d39dd..f76deff71002 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -609,6 +609,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
struct kvm_enc_region *range);
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
+void sev_guest_memory_reclaimed(struct kvm *kvm);
+
void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f18744f7ff82..856c87563883 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4618,6 +4618,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
}
+ if (vmx->nested.update_vmcs01_apicv_status) {
+ vmx->nested.update_vmcs01_apicv_status = false;
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ }
+
if ((vm_exit_reason != -1) &&
(enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index bc3f8512bb64..b82b6709d7a8 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -431,15 +431,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
- if (pmc->perf_event && !pmc->is_paused)
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, data));
+ pmc_update_sample_period(pmc);
return 0;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
- if (pmc->perf_event && !pmc->is_paused)
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, data));
+ pmc_update_sample_period(pmc);
return 0;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 04d170c4b61e..610355b9ccce 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4174,6 +4174,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (is_guest_mode(vcpu)) {
+ vmx->nested.update_vmcs01_apicv_status = true;
+ return;
+ }
+
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
if (cpu_has_secondary_exec_ctrls()) {
if (kvm_vcpu_apicv_active(vcpu))
@@ -5467,7 +5472,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
return vmx->emulation_required && !vmx->rmode.vm86_active &&
- vcpu->arch.exception.pending;
+ (vcpu->arch.exception.pending || vcpu->arch.exception.injected);
}
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 9c6bfcd84008..b98c7e96697a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -183,6 +183,7 @@ struct nested_vmx {
bool change_vmcs01_virtual_apic_mode;
bool reload_vmcs01_apic_access_page;
bool update_vmcs01_cpu_dirty_logging;
+ bool update_vmcs01_apicv_status;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 547ba00ef64f..4790f0d7d40b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9111,7 +9111,7 @@ static void kvm_apicv_init(struct kvm *kvm)
if (!enable_apicv)
set_or_clear_apicv_inhibit(inhibits,
- APICV_INHIBIT_REASON_ABSENT, true);
+ APICV_INHIBIT_REASON_DISABLE, true);
}
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
@@ -9889,6 +9889,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+ static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
+}
+
static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -10015,12 +10020,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+ vcpu->run->system_event.ndata = 0;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+ vcpu->run->system_event.ndata = 0;
r = 0;
goto out;
}
@@ -10097,7 +10104,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/* Store vcpu->apicv_active before vcpu->mode. */
smp_store_release(&vcpu->mode, IN_GUEST_MODE);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
/*
* 1) We should set ->mode before checking ->requests. Please see
@@ -10128,7 +10135,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
smp_wmb();
local_irq_enable();
preempt_enable();
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
r = 1;
goto cancel_injection;
}
@@ -10254,7 +10261,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_enable();
preempt_enable();
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
/*
* Profile KVM exit RIPs:
@@ -10284,7 +10291,7 @@ out:
}
/* Called within kvm->srcu read side. */
-static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+static inline int vcpu_block(struct kvm_vcpu *vcpu)
{
bool hv_timer;
@@ -10300,12 +10307,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
if (hv_timer)
kvm_lapic_switch_to_sw_timer(vcpu);
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
kvm_vcpu_halt(vcpu);
else
kvm_vcpu_block(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (hv_timer)
kvm_lapic_switch_to_hv_timer(vcpu);
@@ -10347,7 +10354,6 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
- struct kvm *kvm = vcpu->kvm;
vcpu->arch.l1tf_flush_l1d = true;
@@ -10355,7 +10361,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
- r = vcpu_block(kvm, vcpu);
+ r = vcpu_block(vcpu);
}
if (r <= 0)
@@ -10374,9 +10380,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
}
if (__xfer_to_guest_mode_work_pending()) {
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
r = xfer_to_guest_mode_handle_work(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (r)
return r;
}
@@ -10387,12 +10393,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{
- int r;
-
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- return r;
+ return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
}
static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -10484,7 +10485,6 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
- struct kvm *kvm = vcpu->kvm;
int r;
vcpu_load(vcpu);
@@ -10492,7 +10492,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
@@ -10504,9 +10504,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_block(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (kvm_apic_accept_events(vcpu) < 0) {
r = 0;
@@ -10567,7 +10567,7 @@ out:
if (kvm_run->kvm_valid_regs)
store_regs(vcpu);
post_kvm_run_save(vcpu);
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
kvm_sigset_deactivate(vcpu);
vcpu_put(vcpu);
@@ -10985,6 +10985,9 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
struct kvm_vcpu *vcpu;
unsigned long i;
+ if (!enable_apicv)
+ return;
+
down_write(&kvm->arch.apicv_update_lock);
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -11196,8 +11199,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
- if (kvm_apicv_activated(vcpu->kvm))
+
+ /*
+ * Defer evaluating inhibits until the vCPU is first run, as
+ * this vCPU will not get notified of any changes until this
+ * vCPU is visible to other vCPUs (marked online and added to
+ * the set of vCPUs). Opportunistically mark APICv active as
+ * VMX in particularly is highly unlikely to have inhibits.
+ * Ignore the current per-VM APICv state so that vCPU creation
+ * is guaranteed to run with a deterministic value, the request
+ * will ensure the vCPU gets the correct state before VM-Entry.
+ */
+ if (enable_apicv) {
vcpu->arch.apicv_active = true;
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ }
} else
static_branch_inc(&kvm_has_noapic_vcpu);
@@ -11995,8 +12011,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
+ if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
+ if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
+ return -EINVAL;
+
return kvm_alloc_memslot_metadata(kvm, new);
+ }
if (change == KVM_MR_FLAGS_ONLY)
memcpy(&new->arch, &old->arch, sizeof(old->arch));
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 8ca5ecf16dc4..9dec1b38a98f 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -53,12 +53,12 @@
SYM_FUNC_START(copy_user_generic_unrolled)
ASM_STAC
cmpl $8,%edx
- jb 20f /* less then 8 bytes, go to byte copy loop */
+ jb .Lcopy_user_short_string_bytes
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
- jz .L_copy_short_string
+ jz copy_user_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
@@ -79,37 +79,11 @@ SYM_FUNC_START(copy_user_generic_unrolled)
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
-.L_copy_short_string:
- movl %edx,%ecx
- andl $7,%edx
- shrl $3,%ecx
- jz 20f
-18: movq (%rsi),%r8
-19: movq %r8,(%rdi)
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
- decl %ecx
- jnz 18b
-20: andl %edx,%edx
- jz 23f
- movl %edx,%ecx
-21: movb (%rsi),%al
-22: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 21b
-23: xor %eax,%eax
- ASM_CLAC
- RET
+ jmp copy_user_short_string
30: shll $6,%ecx
addl %ecx,%edx
- jmp 60f
-40: leal (%rdx,%rcx,8),%edx
- jmp 60f
-50: movl %ecx,%edx
-60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
+ jmp .Lcopy_user_handle_tail
_ASM_EXTABLE_CPY(1b, 30b)
_ASM_EXTABLE_CPY(2b, 30b)
@@ -127,10 +101,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
_ASM_EXTABLE_CPY(14b, 30b)
_ASM_EXTABLE_CPY(15b, 30b)
_ASM_EXTABLE_CPY(16b, 30b)
- _ASM_EXTABLE_CPY(18b, 40b)
- _ASM_EXTABLE_CPY(19b, 40b)
- _ASM_EXTABLE_CPY(21b, 50b)
- _ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -191,7 +161,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
SYM_FUNC_START(copy_user_enhanced_fast_string)
ASM_STAC
/* CPUs without FSRM should avoid rep movsb for short copies */
- ALTERNATIVE "cmpl $64, %edx; jb .L_copy_short_string", "", X86_FEATURE_FSRM
+ ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
movl %edx,%ecx
1: rep movsb
xorl %eax,%eax
@@ -244,6 +214,53 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
+ * Finish memcpy of less than 64 bytes. #AC should already be set.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count (< 64)
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+SYM_CODE_START_LOCAL(copy_user_short_string)
+ movl %edx,%ecx
+ andl $7,%edx
+ shrl $3,%ecx
+ jz .Lcopy_user_short_string_bytes
+18: movq (%rsi),%r8
+19: movq %r8,(%rdi)
+ leaq 8(%rsi),%rsi
+ leaq 8(%rdi),%rdi
+ decl %ecx
+ jnz 18b
+.Lcopy_user_short_string_bytes:
+ andl %edx,%edx
+ jz 23f
+ movl %edx,%ecx
+21: movb (%rsi),%al
+22: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 21b
+23: xor %eax,%eax
+ ASM_CLAC
+ RET
+
+40: leal (%rdx,%rcx,8),%edx
+ jmp 60f
+50: movl %ecx,%edx /* ecx is zerorest also */
+60: jmp .Lcopy_user_handle_tail
+
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
+SYM_CODE_END(copy_user_short_string)
+
+/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination out of cache for more performance.
*
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index ecb2049c1273..b7dfd60243b7 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -48,6 +48,7 @@ SYM_FUNC_START(__put_user_1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@@ -62,6 +63,7 @@ SYM_FUNC_START(__put_user_2)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
@@ -76,6 +78,7 @@ SYM_FUNC_START(__put_user_4)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
@@ -90,6 +93,7 @@ SYM_FUNC_START(__put_user_8)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
+ ENDBR
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 5f87bab4fb8d..b2b2366885a2 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -31,6 +31,7 @@
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
@@ -55,7 +56,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array)
- ANNOTATE_NOENDBR // apply_retpolines
#define GEN(reg) THUNK reg
#include <asm/GEN-for-each-reg.h>
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0402a749f3a0..0ae6cf804197 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -119,7 +119,7 @@ void __memcpy_flushcache(void *_dst, const void *_src, size_t size)
/* cache copy and flush to align dest */
if (!IS_ALIGNED(dest, 8)) {
- unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
+ size_t len = min_t(size_t, size, ALIGN(dest, 8) - dest);
memcpy((void *) dest, (void *) source, len);
clean_cache_range((void *) dest, len);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 96d34ebb20a9..e2942335d143 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -902,6 +902,8 @@ static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end
static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
{
+ const unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
+
vmemmap_flush_unused_pmd();
/*
@@ -914,8 +916,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
* Mark with PAGE_UNUSED the unused parts of the new memmap range
*/
if (!IS_ALIGNED(start, PMD_SIZE))
- memset((void *)start, PAGE_UNUSED,
- start - ALIGN_DOWN(start, PMD_SIZE));
+ memset((void *)page, PAGE_UNUSED, start - page);
/*
* We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index abf5ed76e4b7..0656db33574d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -638,17 +638,6 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
}
EXPORT_SYMBOL_GPL(lookup_address);
-/*
- * Lookup the page table entry for a virtual address in a given mm. Return a
- * pointer to the entry and the level of the mapping.
- */
-pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
- unsigned int *level)
-{
- return lookup_address_in_pgd(pgd_offset(mm, address), address, level);
-}
-EXPORT_SYMBOL_GPL(lookup_address_in_mm);
-
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
unsigned int *level)
{
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 9bb1e2941179..b94f727251b6 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -467,7 +467,6 @@ static __init void xen_setup_pci_msi(void)
else
xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs;
xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs;
- pci_msi_ignore_mask = 1;
} else if (xen_hvm_domain()) {
xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs;
xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs;
@@ -481,6 +480,11 @@ static __init void xen_setup_pci_msi(void)
* in allocating the native domain and never use it.
*/
x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
+ /*
+ * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
+ * controlled by the hypervisor.
+ */
+ pci_msi_ignore_mask = 1;
}
#else /* CONFIG_PCI_MSI */
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 147c30a81f15..1591d67e0bcd 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -93,6 +93,9 @@ static const unsigned long * const efi_tables[] = {
#ifdef CONFIG_LOAD_UEFI_KEYS
&efi.mokvar_table,
#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+ &efi.coco_secret,
+#endif
};
u64 efi_setup; /* efi setup_data physical address */
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 72c1e42d121d..7fe564eaf228 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -50,6 +50,7 @@
#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8)
SYM_CODE_START_LOCAL(pvh_start_xen)
+ UNWIND_HINT_EMPTY
cld
lgdt (_pa(gdt))
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 3822666fb73d..bb176c72891c 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -25,6 +25,7 @@
#include <asm/cpu.h>
#include <asm/mmu_context.h>
#include <asm/cpu_device_id.h>
+#include <asm/microcode.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
@@ -262,11 +263,18 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
perf_restore_debug_store();
- msr_restore_context(ctxt);
c = &cpu_data(smp_processor_id());
if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
init_ia32_feat_ctl(c);
+
+ microcode_bsp_resume();
+
+ /*
+ * This needs to happen after the microcode has been updated upon resume
+ * because some of the MSRs are "emulated" in microcode.
+ */
+ msr_restore_context(ctxt);
}
/* Needed by apm.c */
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 688aa8b6ae29..ba7af2eca755 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -260,8 +260,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
- if (ctxt == NULL)
+ if (ctxt == NULL) {
+ cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
+ cpumask_clear_cpu(cpu, cpu_callout_mask);
return -ENOMEM;
+ }
gdt = get_cpu_gdt_rw(cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index ac17196e2518..3a2cd93bf059 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -45,6 +45,7 @@ SYM_CODE_END(hypercall_page)
__INIT
SYM_CODE_START(startup_xen)
UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
cld
/* Clear .bss */
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
index 45cc0ae0af6f..c7b9f12896f2 100644
--- a/arch/xtensa/kernel/coprocessor.S
+++ b/arch/xtensa/kernel/coprocessor.S
@@ -29,7 +29,7 @@
.if XTENSA_HAVE_COPROCESSOR(x); \
.align 4; \
.Lsave_cp_regs_cp##x: \
- xchal_cp##x##_store a2 a4 a5 a6 a7; \
+ xchal_cp##x##_store a2 a3 a4 a5 a6; \
jx a0; \
.endif
@@ -46,7 +46,7 @@
.if XTENSA_HAVE_COPROCESSOR(x); \
.align 4; \
.Lload_cp_regs_cp##x: \
- xchal_cp##x##_load a2 a4 a5 a6 a7; \
+ xchal_cp##x##_load a2 a3 a4 a5 a6; \
jx a0; \
.endif
diff --git a/arch/xtensa/kernel/jump_label.c b/arch/xtensa/kernel/jump_label.c
index 0dde21e0d3de..ad1841cecdfb 100644
--- a/arch/xtensa/kernel/jump_label.c
+++ b/arch/xtensa/kernel/jump_label.c
@@ -40,7 +40,7 @@ static int patch_text_stop_machine(void *data)
{
struct patch *patch = data;
- if (atomic_inc_return(&patch->cpu_count) == 1) {
+ if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
local_patch_text(patch->addr, patch->data, patch->sz);
atomic_inc(&patch->cpu_count);
} else {
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 81d7c7e8f7e9..10b79d3c74e0 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -36,24 +36,19 @@ static void rs_poll(struct timer_list *);
static struct tty_driver *serial_driver;
static struct tty_port serial_port;
static DEFINE_TIMER(serial_timer, rs_poll);
-static DEFINE_SPINLOCK(timer_lock);
static int rs_open(struct tty_struct *tty, struct file * filp)
{
- spin_lock_bh(&timer_lock);
if (tty->count == 1)
mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
- spin_unlock_bh(&timer_lock);
return 0;
}
static void rs_close(struct tty_struct *tty, struct file * filp)
{
- spin_lock_bh(&timer_lock);
if (tty->count == 1)
del_timer_sync(&serial_timer);
- spin_unlock_bh(&timer_lock);
}
@@ -73,8 +68,6 @@ static void rs_poll(struct timer_list *unused)
int rd = 1;
unsigned char c;
- spin_lock(&timer_lock);
-
while (simc_poll(0)) {
rd = simc_read(0, &c, 1);
if (rd <= 0)
@@ -87,7 +80,6 @@ static void rs_poll(struct timer_list *unused)
tty_flip_buffer_push(port);
if (rd)
mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
- spin_unlock(&timer_lock);
}
diff --git a/block/Makefile b/block/Makefile
index 3950ecbc5c26..4e01bb71ad6e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
+obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
diff --git a/block/badblocks.c b/block/badblocks.c
index d39056630d9c..3afb550c0f7b 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
s >>= bb->shift;
target += (1<<bb->shift) - 1;
target >>= bb->shift;
- sectors = target - s;
}
/* 'target' is now the first block after the bad range */
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
s += (1<<bb->shift) - 1;
s >>= bb->shift;
target >>= bb->shift;
- sectors = target - s;
}
write_seqlock_irq(&bb->lock);
diff --git a/block/bdev.c b/block/bdev.c
index 13de871fa816..7bf88e591aaf 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -673,17 +673,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
}
}
- if (!bdev->bd_openers)
+ if (!atomic_read(&bdev->bd_openers))
set_init_blocksize(bdev);
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
- bdev->bd_openers++;
+ atomic_inc(&bdev->bd_openers);
return 0;
}
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
{
- if (!--bdev->bd_openers)
+ if (atomic_dec_and_test(&bdev->bd_openers))
blkdev_flush_mapping(bdev);
if (bdev->bd_disk->fops->release)
bdev->bd_disk->fops->release(bdev->bd_disk, mode);
@@ -694,7 +694,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
struct gendisk *disk = part->bd_disk;
int ret;
- if (part->bd_openers)
+ if (atomic_read(&part->bd_openers))
goto done;
ret = blkdev_get_whole(bdev_whole(part), mode);
@@ -708,7 +708,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
disk->open_partitions++;
set_init_blocksize(part);
done:
- part->bd_openers++;
+ atomic_inc(&part->bd_openers);
return 0;
out_blkdev_put:
@@ -720,7 +720,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
{
struct block_device *whole = bdev_whole(part);
- if (--part->bd_openers)
+ if (!atomic_dec_and_test(&part->bd_openers))
return;
blkdev_flush_mapping(part);
whole->bd_disk->open_partitions--;
@@ -899,7 +899,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* of the world and we want to avoid long (could be several minute)
* syncs while holding the mutex.
*/
- if (bdev->bd_openers == 1)
+ if (atomic_read(&bdev->bd_openers) == 1)
sync_blockdev(bdev);
mutex_lock(&disk->open_mutex);
@@ -1044,7 +1044,7 @@ void sync_bdevs(bool wait)
bdev = I_BDEV(inode);
mutex_lock(&bdev->bd_disk->open_mutex);
- if (!bdev->bd_openers) {
+ if (!atomic_read(&bdev->bd_openers)) {
; /* skip */
} else if (wait) {
/*
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 420eda2589c0..09574af83566 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
*/
bfqg->bfqd = bfqd;
bfqg->active_entities = 0;
+ bfqg->online = true;
bfqg->rq_pos_tree = RB_ROOT;
}
@@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
entity->sched_data = &parent->sched_data;
}
-static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
- struct blkcg *blkcg)
+static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
{
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup(blkcg, bfqd->queue);
- if (likely(blkg))
- return blkg_to_bfqg(blkg);
- return NULL;
-}
-
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
-{
- struct bfq_group *bfqg, *parent;
+ struct bfq_group *parent;
struct bfq_entity *entity;
- bfqg = bfq_lookup_bfqg(bfqd, blkcg);
-
- if (unlikely(!bfqg))
- return NULL;
-
/*
* Update chain of bfq_groups as we might be handling a leaf group
* which, along with some of its relatives, has not been hooked yet
@@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
bfq_group_set_parent(curr_bfqg, parent);
}
}
+}
- return bfqg;
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ struct bfq_group *bfqg;
+
+ while (blkg) {
+ bfqg = blkg_to_bfqg(blkg);
+ if (bfqg->online) {
+ bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
+ return bfqg;
+ }
+ blkg = blkg->parent;
+ }
+ bio_associate_blkg_from_css(bio,
+ &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
+ return bfqd->root_group;
}
/**
@@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* Move bic to blkcg, assuming that bfqd->lock is held; which makes
* sure that the reference to cgroup is valid across the call (see
* comments in bfq_bic_update_cgroup on this issue)
- *
- * NOTE: an alternative approach might have been to store the current
- * cgroup in bfqq and getting a reference to it, reducing the lookup
- * time here, at the price of slightly more complex code.
*/
-static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
- struct bfq_io_cq *bic,
- struct blkcg *blkcg)
+static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_group *bfqg)
{
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
- struct bfq_group *bfqg;
struct bfq_entity *entity;
- bfqg = bfq_find_set_group(bfqd, blkcg);
-
- if (unlikely(!bfqg))
- bfqg = bfqd->root_group;
-
if (async_bfqq) {
entity = &async_bfqq->entity;
@@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
if (sync_bfqq) {
- entity = &sync_bfqq->entity;
- if (entity->sched_data != &bfqg->sched_data)
- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ } else {
+ struct bfq_queue *bfqq;
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data !=
+ &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is
+ * not valid anymore. We cannot easily just
+ * cancel the merge (by clearing new_bfqq) as
+ * there may be other processes using this
+ * queue and holding refs to all queues below
+ * sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from
+ * bfqq now so that we cannot merge bio to a
+ * request from the old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, 1);
+ }
+ }
}
return bfqg;
@@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
- struct bfq_group *bfqg = NULL;
+ struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
uint64_t serial_nr;
- rcu_read_lock();
- serial_nr = __bio_blkcg(bio)->css.serial_nr;
+ serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
/*
* Check whether blkcg has changed. The condition may trigger
* spuriously on a newly created cic but there's no harm.
*/
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
- goto out;
+ return;
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+ /*
+ * New cgroup for this process. Make sure it is linked to bfq internal
+ * cgroup hierarchy.
+ */
+ bfq_link_bfqg(bfqd, bfqg);
+ __bfq_bic_change_cgroup(bfqd, bic, bfqg);
/*
* Update blkg_path for bfq_log_* functions. We cache this
* path, and update it here, for the following
@@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
*/
blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
bic->blkcg_serial_nr = serial_nr;
-out:
- rcu_read_unlock();
}
/**
@@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
put_async_queues:
bfq_put_async_queues(bfqd, bfqg);
+ bfqg->online = false;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
{
return bfqd->root_group;
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 2e0dd68a3cbe..0d46cb728bbf 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
*/
static const unsigned long bfq_late_stable_merging = 600;
-#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
+#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
*/
void bfq_schedule_dispatch(struct bfq_data *bfqd)
{
+ lockdep_assert_held(&bfqd->lock);
+
if (bfqd->queued != 0) {
bfq_log(bfqd, "schedule dispatch");
blk_mq_run_hw_queues(bfqd->queue, true);
@@ -569,7 +571,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
struct bfq_entity *entity = &bfqq->entity;
struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
struct bfq_entity **entities = inline_entities;
- int depth, level;
+ int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
int class_idx = bfqq->ioprio_class - 1;
struct bfq_sched_data *sched_data;
unsigned long wsum;
@@ -578,15 +580,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
if (!entity->on_st_or_in_serv)
return false;
+retry:
+ spin_lock_irq(&bfqd->lock);
/* +1 for bfqq entity, root cgroup not included */
depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
- if (depth > BFQ_LIMIT_INLINE_DEPTH) {
+ if (depth > alloc_depth) {
+ spin_unlock_irq(&bfqd->lock);
+ if (entities != inline_entities)
+ kfree(entities);
entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO);
if (!entities)
return false;
+ alloc_depth = depth;
+ goto retry;
}
- spin_lock_irq(&bfqd->lock);
sched_data = entity->sched_data;
/* Gather our ancestors as we need to traverse them in reverse order */
level = 0;
@@ -2127,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->last_completed_rq_bfqq ||
bfqd->last_completed_rq_bfqq == bfqq ||
bfq_bfqq_has_short_ttime(bfqq) ||
- bfqq->dispatched > 0 ||
- now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
- bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+ now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
return;
/*
@@ -2202,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
bfqq->queued[rq_is_sync(rq)]++;
- bfqd->queued++;
+ /*
+ * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+ * may be read without holding the lock in bfq_has_work().
+ */
+ WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
- if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+ if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
bfq_check_waker(bfqd, bfqq, now_ns);
/*
@@ -2394,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
if (rq->queuelist.prev != &rq->queuelist)
list_del_init(&rq->queuelist);
bfqq->queued[sync]--;
- bfqd->queued--;
+ /*
+ * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+ * may be read without holding the lock in bfq_has_work().
+ */
+ WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
elv_rb_del(&bfqq->sort_list, rq);
elv_rqhash_del(q, rq);
@@ -2457,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
spin_lock_irq(&bfqd->lock);
- if (bic)
+ if (bic) {
+ /*
+ * Make sure cgroup info is uptodate for current process before
+ * considering the merge.
+ */
+ bfq_bic_update_cgroup(bic, bio);
+
bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
- else
+ } else {
bfqd->bio_bfqq = NULL;
+ }
bfqd->bio_bic = bic;
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
@@ -2490,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
return ELEVATOR_NO_MERGE;
}
-static struct bfq_queue *bfq_init_rq(struct request *rq);
-
static void bfq_request_merged(struct request_queue *q, struct request *req,
enum elv_merge type)
{
@@ -2500,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
blk_rq_pos(req) <
blk_rq_pos(container_of(rb_prev(&req->rb_node),
struct request, rb_node))) {
- struct bfq_queue *bfqq = bfq_init_rq(req);
+ struct bfq_queue *bfqq = RQ_BFQQ(req);
struct bfq_data *bfqd;
struct request *prev, *next_rq;
@@ -2552,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
struct request *next)
{
- struct bfq_queue *bfqq = bfq_init_rq(rq),
- *next_bfqq = bfq_init_rq(next);
+ struct bfq_queue *bfqq = RQ_BFQQ(rq),
+ *next_bfqq = RQ_BFQQ(next);
if (!bfqq)
goto remove;
@@ -2758,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
if (process_refs == 0 || new_process_refs == 0)
return NULL;
+ /*
+ * Make sure merged queues belong to the same parent. Parents could
+ * have changed since the time we decided the two queues are suitable
+ * for merging.
+ */
+ if (new_bfqq->entity.parent != bfqq->entity.parent)
+ return NULL;
+
bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
new_bfqq->pid);
@@ -2895,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_queue *new_bfqq =
bfq_setup_merge(bfqq, stable_merge_bfqq);
- bic->stably_merged = true;
- if (new_bfqq && new_bfqq->bic)
- new_bfqq->bic->stably_merged = true;
+ if (new_bfqq) {
+ bic->stably_merged = true;
+ if (new_bfqq->bic)
+ new_bfqq->bic->stably_merged =
+ true;
+ }
return new_bfqq;
} else
return NULL;
@@ -5039,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
/*
- * Avoiding lock: a race on bfqd->busy_queues should cause at
+ * Avoiding lock: a race on bfqd->queued should cause at
* most a call to dispatch for nothing
*/
return !list_empty_careful(&bfqd->dispatch) ||
- bfq_tot_busy_queues(bfqd) > 0;
+ READ_ONCE(bfqd->queued);
}
static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -5304,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
{
struct bfq_queue *__bfqq, *next;
@@ -5710,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq;
struct bfq_group *bfqg;
- rcu_read_lock();
-
- bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
- if (!bfqg) {
- bfqq = &bfqd->oom_bfqq;
- goto out;
- }
-
+ bfqg = bfq_bio_bfqg(bfqd, bio);
if (!is_sync) {
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
ioprio);
@@ -5763,8 +5786,6 @@ out:
if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
-
- rcu_read_unlock();
return bfqq;
}
@@ -6111,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
unsigned int cmd_flags) {}
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
{
@@ -6126,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bfqg_stats_update_legacy_io(q, rq);
#endif
spin_lock_irq(&bfqd->lock);
+ bfqq = bfq_init_rq(rq);
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
spin_unlock_irq(&bfqd->lock);
blk_mq_free_requests(&free);
return;
}
- spin_unlock_irq(&bfqd->lock);
-
trace_block_rq_insert(rq);
- spin_lock_irq(&bfqd->lock);
- bfqq = bfq_init_rq(rq);
if (!bfqq || at_head) {
if (at_head)
list_add(&rq->queuelist, &bfqd->dispatch);
@@ -6354,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
bfq_schedule_dispatch(bfqd);
}
-static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
-{
- bfqq_request_freed(bfqq);
- bfq_put_queue(bfqq);
-}
-
/*
* The processes associated with bfqq may happen to generate their
* cumulative I/O at a lower rate than the rate at which the device
@@ -6556,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
bfq_completed_request(bfqq, bfqd);
}
- bfq_finish_requeue_request_body(bfqq);
+ bfqq_request_freed(bfqq);
+ bfq_put_queue(bfqq);
+ RQ_BIC(rq)->requests--;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -6790,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_request_allocated(bfqq);
bfqq->ref++;
+ bic->requests++;
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
rq, bfqq, bfqq->ref);
@@ -6886,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_bfqq_expire(bfqd, bfqq, true, reason);
schedule_dispatch:
- spin_unlock_irqrestore(&bfqd->lock, flags);
bfq_schedule_dispatch(bfqd);
+ spin_unlock_irqrestore(&bfqd->lock, flags);
}
/*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 3b83e3d1c2e5..ca8177d7bf7c 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -468,6 +468,7 @@ struct bfq_io_cq {
struct bfq_queue *stable_merge_bfqq;
bool stably_merged; /* non splittable if true */
+ unsigned int requests; /* Number of requests this process has in flight */
};
/**
@@ -928,6 +929,8 @@ struct bfq_group {
/* reference counter (see comments in bfq_bic_update_cgroup) */
int ref;
+ /* Is bfq_group still online? */
+ bool online;
struct bfq_entity entity;
struct bfq_sched_data sched_data;
@@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
@@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
void bfq_end_wr_async(struct bfq_data *bfqd);
-struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg);
+struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
@@ -1100,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
break; \
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
blk_add_cgroup_trace_msg((bfqd)->queue, \
- bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
+ &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \
"%s " fmt, pid_str, ##args); \
} while (0)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
blk_add_cgroup_trace_msg((bfqd)->queue, \
- bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
+ &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \
} while (0)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
diff --git a/block/bio.c b/block/bio.c
index 4259125e16ab..a3893d80dccc 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -224,24 +224,13 @@ EXPORT_SYMBOL(bio_uninit);
static void bio_free(struct bio *bio)
{
struct bio_set *bs = bio->bi_pool;
- void *p;
-
- bio_uninit(bio);
+ void *p = bio;
- if (bs) {
- bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+ WARN_ON_ONCE(!bs);
- /*
- * If we have front padding, adjust the bio pointer before freeing
- */
- p = bio;
- p -= bs->front_pad;
-
- mempool_free(p, &bs->bio_pool);
- } else {
- /* Bio was allocated by bio_kmalloc() */
- kfree(bio);
- }
+ bio_uninit(bio);
+ bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
+ mempool_free(p - bs->front_pad, &bs->bio_pool);
}
/*
@@ -419,6 +408,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
+static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
+ unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
+ struct bio_set *bs)
+{
+ struct bio_alloc_cache *cache;
+ struct bio *bio;
+
+ cache = per_cpu_ptr(bs->cache, get_cpu());
+ if (!cache->free_list) {
+ put_cpu();
+ return NULL;
+ }
+ bio = cache->free_list;
+ cache->free_list = bio->bi_next;
+ cache->nr--;
+ put_cpu();
+
+ bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
+ bio->bi_pool = bs;
+ return bio;
+}
+
/**
* bio_alloc_bioset - allocate a bio for I/O
* @bdev: block device to allocate the bio for (can be %NULL)
@@ -451,6 +462,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
* for per bio allocations.
*
+ * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
+ * context, not hard/soft IRQ.
+ *
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
@@ -465,6 +479,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
return NULL;
+ if (opf & REQ_ALLOC_CACHE) {
+ if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
+ bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
+ gfp_mask, bs);
+ if (bio)
+ return bio;
+ /*
+ * No cached bio available, bio returned below marked with
+ * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
+ */
+ } else {
+ opf &= ~REQ_ALLOC_CACHE;
+ }
+ }
+
/*
* submit_bio_noacct() converts recursion to iteration; this means if
* we're running beneath it, any bios we allocate and submit will not be
@@ -528,28 +557,28 @@ err_free:
EXPORT_SYMBOL(bio_alloc_bioset);
/**
- * bio_kmalloc - kmalloc a bio for I/O
+ * bio_kmalloc - kmalloc a bio
+ * @nr_vecs: number of bio_vecs to allocate
* @gfp_mask: the GFP_* mask given to the slab allocator
- * @nr_iovecs: number of iovecs to pre-allocate
*
- * Use kmalloc to allocate and initialize a bio.
+ * Use kmalloc to allocate a bio (including bvecs). The bio must be initialized
+ * using bio_init() before use. To free a bio returned from this function use
+ * kfree() after calling bio_uninit(). A bio returned from this function can
+ * be reused by calling bio_uninit() before calling bio_init() again.
+ *
+ * Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this
+ * function are not backed by a mempool can can fail. Do not use this function
+ * for allocations in the file system I/O path.
*
* Returns: Pointer to new bio on success, NULL on failure.
*/
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
{
struct bio *bio;
- if (nr_iovecs > UIO_MAXIOV)
- return NULL;
-
- bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
- if (unlikely(!bio))
+ if (nr_vecs > UIO_MAXIOV)
return NULL;
- bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
- 0);
- bio->bi_pool = NULL;
- return bio;
+ return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
}
EXPORT_SYMBOL(bio_kmalloc);
@@ -711,7 +740,7 @@ void bio_put(struct bio *bio)
return;
}
- if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
+ if (bio->bi_opf & REQ_ALLOC_CACHE) {
struct bio_alloc_cache *cache;
bio_uninit(bio);
@@ -732,14 +761,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
- if (bio->bi_bdev == bio_src->bi_bdev &&
- bio_flagged(bio_src, BIO_REMAPPED))
- bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_iter = bio_src->bi_iter;
- bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
+ if (bio->bi_bdev) {
+ if (bio->bi_bdev == bio_src->bi_bdev &&
+ bio_flagged(bio_src, BIO_REMAPPED))
+ bio_set_flag(bio, BIO_REMAPPED);
+ bio_clone_blkg_association(bio, bio_src);
+ }
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
@@ -1727,55 +1757,13 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
flags |= BIOSET_NEED_BVECS;
if (src->rescue_workqueue)
flags |= BIOSET_NEED_RESCUER;
+ if (src->cache)
+ flags |= BIOSET_PERCPU_CACHE;
return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
}
EXPORT_SYMBOL(bioset_init_from_src);
-/**
- * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
- * @kiocb: kiocb describing the IO
- * @bdev: block device to allocate the bio for (can be %NULL)
- * @nr_vecs: number of iovecs to pre-allocate
- * @opf: operation and flags for bio
- * @bs: bio_set to allocate from
- *
- * Description:
- * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
- * used to check if we should dip into the per-cpu bio_set allocation
- * cache. The allocation uses GFP_KERNEL internally. On return, the
- * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
- * MUST be done from process context, not hard/soft IRQ.
- *
- */
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
- unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
-{
- struct bio_alloc_cache *cache;
- struct bio *bio;
-
- if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
- return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
-
- cache = per_cpu_ptr(bs->cache, get_cpu());
- if (cache->free_list) {
- bio = cache->free_list;
- cache->free_list = bio->bi_next;
- cache->nr--;
- put_cpu();
- bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
- nr_vecs, opf);
- bio->bi_pool = bs;
- bio_set_flag(bio, BIO_PERCPU_CACHE);
- return bio;
- }
- put_cpu();
- bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
- bio_set_flag(bio, BIO_PERCPU_CACHE);
- return bio;
-}
-EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
-
static int __init init_bio(void)
{
int i;
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
new file mode 100644
index 000000000000..760a2e1878dd
--- /dev/null
+++ b/block/blk-cgroup-fc-appid.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "blk-cgroup.h"
+
+/**
+ * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
+ * @app_id: application identifier
+ * @cgrp_id: cgroup id
+ * @app_id_len: size of application identifier
+ */
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
+{
+ struct cgroup *cgrp;
+ struct cgroup_subsys_state *css;
+ struct blkcg *blkcg;
+ int ret = 0;
+
+ if (app_id_len > FC_APPID_LEN)
+ return -EINVAL;
+
+ cgrp = cgroup_get_from_id(cgrp_id);
+ if (!cgrp)
+ return -ENOENT;
+ css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+ if (!css) {
+ ret = -ENOENT;
+ goto out_cgrp_put;
+ }
+ blkcg = css_to_blkcg(css);
+ /*
+ * There is a slight race condition on setting the appid.
+ * Worst case an I/O may not find the right id.
+ * This is no different from the I/O we let pass while obtaining
+ * the vmid from the fabric.
+ * Adding the overhead of a lock is not necessary.
+ */
+ strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+ css_put(css);
+out_cgrp_put:
+ cgroup_put(cgrp);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
+
+/**
+ * blkcg_get_fc_appid - get the fc app identifier associated with a bio
+ * @bio: target bio
+ *
+ * On success return the fc_app_id, on failure return NULL
+ */
+char *blkcg_get_fc_appid(struct bio *bio)
+{
+ if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
+ return NULL;
+ return bio->bi_blkg->blkcg->fc_app_id;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8dfe62786cd5..40161a3f68d0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
+
static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -156,6 +173,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
}
/**
+ * bio_blkcg_css - return the blkcg CSS associated with a bio
+ * @bio: target bio
+ *
+ * This returns the CSS for the blkcg associated with a bio, or %NULL if not
+ * associated. Callers are expected to either handle %NULL or know association
+ * has been done prior to calling this.
+ */
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
+{
+ if (!bio || !bio->bi_blkg)
+ return NULL;
+ return &bio->bi_blkg->blkcg->css;
+}
+EXPORT_SYMBOL_GPL(bio_blkcg_css);
+
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg. Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+ return css_to_blkcg(blkcg->css.parent);
+}
+
+/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
* @q: request_queue the new blkg is associated with
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_gq *blkg;
int i, ret;
- WARN_ON_ONCE(!rcu_read_lock_held());
lockdep_assert_held(&q->queue_lock);
/* request_queue is dying, do not create/recreate a blkg */
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
struct blkg_iostat_set *bis = &blkg->iostat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
- bool has_stats = false;
const char *dname;
unsigned seq;
int i;
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
} while (u64_stats_fetch_retry(&bis->sync, seq));
if (rbytes || wbytes || rios || wios) {
- has_stats = true;
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
rbytes, wbytes, rios, wios,
dbytes, dios);
}
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
- has_stats = true;
seq_printf(s, " use_delay=%d delay_nsec=%llu",
atomic_read(&blkg->use_delay),
atomic64_read(&blkg->delay_nsec));
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
if (!blkg->pd[i] || !pol->pd_stat_fn)
continue;
- if (pol->pd_stat_fn(blkg->pd[i], s))
- has_stats = true;
+ pol->pd_stat_fn(blkg->pd[i], s);
}
- if (has_stats)
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
static int blkcg_print_stat(struct seq_file *sf, void *v)
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
{ } /* terminate */
};
+#ifdef CONFIG_CGROUP_WRITEBACK
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
+{
+ return &css_to_blkcg(css)->cgwb_list;
+}
+#endif
+
/*
* blkcg destruction is a three-stage process.
*
@@ -1016,25 +1061,6 @@ static struct cftype blkcg_legacy_files[] = {
*/
/**
- * blkcg_css_offline - cgroup css_offline callback
- * @css: css of interest
- *
- * This function is called when @css is about to go away. Here the cgwbs are
- * offlined first and only once writeback associated with the blkcg has
- * finished do we start step 2 (see above).
- */
-static void blkcg_css_offline(struct cgroup_subsys_state *css)
-{
- struct blkcg *blkcg = css_to_blkcg(css);
-
- /* this prevents anyone from attaching or migrating to this blkcg */
- wb_blkcg_offline(blkcg);
-
- /* put the base online pin allowing step 2 to be triggered */
- blkcg_unpin_online(blkcg);
-}
-
-/**
* blkcg_destroy_blkgs - responsible for shooting down blkgs
* @blkcg: blkcg of interest
*
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
*
* This is the blkcg counterpart of ioc_release_fn().
*/
-void blkcg_destroy_blkgs(struct blkcg *blkcg)
+static void blkcg_destroy_blkgs(struct blkcg *blkcg)
{
might_sleep();
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
spin_unlock_irq(&blkcg->lock);
}
+/**
+ * blkcg_pin_online - pin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * While pinned, a blkcg is kept online. This is primarily used to
+ * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
+ * while an associated cgwb is still active.
+ */
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
+{
+ refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
+}
+
+/**
+ * blkcg_unpin_online - unpin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * This is primarily used to impedance-match blkg and cgwb lifetimes so
+ * that blkg doesn't go offline while an associated cgwb is still active.
+ * When this count goes to zero, all active cgwbs have finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ */
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
+{
+ struct blkcg *blkcg = css_to_blkcg(blkcg_css);
+
+ do {
+ if (!refcount_dec_and_test(&blkcg->online_pin))
+ break;
+ blkcg_destroy_blkgs(blkcg);
+ blkcg = blkcg_parent(blkcg);
+ } while (blkcg);
+}
+
+/**
+ * blkcg_css_offline - cgroup css_offline callback
+ * @css: css of interest
+ *
+ * This function is called when @css is about to go away. Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
+ */
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
+{
+ /* this prevents anyone from attaching or migrating to this blkcg */
+ wb_blkcg_offline(css);
+
+ /* put the base online pin allowing step 2 to be triggered */
+ blkcg_unpin_online(css);
+}
+
static void blkcg_css_free(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
@@ -1163,8 +1240,7 @@ unlock:
static int blkcg_css_online(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg *parent = blkcg_parent(blkcg);
+ struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
/*
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
* parent so that offline always happens towards the root.
*/
if (parent)
- blkcg_pin_online(parent);
+ blkcg_pin_online(css);
return 0;
}
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
preloaded = !radix_tree_preload(GFP_KERNEL);
/* Make sure the root blkg exists. */
- rcu_read_lock();
+ /* spin_lock_irq can serve as RCU read-side critical section. */
spin_lock_irq(&q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
if (IS_ERR(blkg))
goto err_unlock;
q->root_blkg = blkg;
spin_unlock_irq(&q->queue_lock);
- rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
@@ -1234,7 +1309,6 @@ err_destroy_all:
return ret;
err_unlock:
spin_unlock_irq(&q->queue_lock);
- rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
return PTR_ERR(blkg);
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
void blkcg_maybe_throttle_current(void)
{
struct request_queue *q = current->throttle_queue;
- struct cgroup_subsys_state *css;
struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool use_memdelay = current->use_memdelay;
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
current->use_memdelay = false;
rcu_read_lock();
- css = kthread_blkcg();
- if (css)
- blkcg = css_to_blkcg(css);
- else
- blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
-
+ blkcg = css_to_blkcg(blkcg_css());
if (!blkcg)
goto out;
blkg = blkg_lookup(blkcg, q);
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
rcu_read_lock();
if (bio->bi_blkg)
- css = &bio_blkcg(bio)->css;
+ css = bio_blkcg_css(bio);
else
css = blkcg_css();
@@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio)
put_cpu();
}
+bool blk_cgroup_congested(void)
+{
+ struct cgroup_subsys_state *css;
+ bool ret = false;
+
+ rcu_read_lock();
+ for (css = blkcg_css(); css; css = css->parent) {
+ if (atomic_read(&css->cgroup->congestion_count)) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static int __init blkcg_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 47e1e38390c9..d4de0a35e066 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,13 +15,101 @@
*/
#include <linux/blk-cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/kthread.h>
#include <linux/blk-mq.h>
+struct blkcg_gq;
+struct blkg_policy_data;
+
+
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
#ifdef CONFIG_BLK_CGROUP
+enum blkg_iostat_type {
+ BLKG_IOSTAT_READ,
+ BLKG_IOSTAT_WRITE,
+ BLKG_IOSTAT_DISCARD,
+
+ BLKG_IOSTAT_NR,
+};
+
+struct blkg_iostat {
+ u64 bytes[BLKG_IOSTAT_NR];
+ u64 ios[BLKG_IOSTAT_NR];
+};
+
+struct blkg_iostat_set {
+ struct u64_stats_sync sync;
+ struct blkg_iostat cur;
+ struct blkg_iostat last;
+};
+
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
+ /* Pointer to the associated request_queue */
+ struct request_queue *q;
+ struct list_head q_node;
+ struct hlist_node blkcg_node;
+ struct blkcg *blkcg;
+
+ /* all non-root blkcg_gq's are guaranteed to have access to parent */
+ struct blkcg_gq *parent;
+
+ /* reference count */
+ struct percpu_ref refcnt;
+
+ /* is this blkg online? protected by both blkcg and q locks */
+ bool online;
+
+ struct blkg_iostat_set __percpu *iostat_cpu;
+ struct blkg_iostat_set iostat;
+
+ struct blkg_policy_data *pd[BLKCG_MAX_POLS];
+
+ spinlock_t async_bio_lock;
+ struct bio_list async_bios;
+ union {
+ struct work_struct async_bio_work;
+ struct work_struct free_work;
+ };
+
+ atomic_t use_delay;
+ atomic64_t delay_nsec;
+ atomic64_t delay_start;
+ u64 last_delay;
+ int last_use;
+
+ struct rcu_head rcu_head;
+};
+
+struct blkcg {
+ struct cgroup_subsys_state css;
+ spinlock_t lock;
+ refcount_t online_pin;
+
+ struct radix_tree_root blkg_tree;
+ struct blkcg_gq __rcu *blkg_hint;
+ struct hlist_head blkg_list;
+
+ struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
+
+ struct list_head all_blkcgs_node;
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
+ char fc_app_id[FC_APPID_LEN];
+#endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+ struct list_head cgwb_list;
+#endif
+};
+
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
/*
* A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
* request_queue (q). This is used by blkcg policies which need to track
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
struct seq_file *s);
struct blkcg_policy {
@@ -123,52 +211,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
- struct cgroup_subsys_state *css;
-
- css = kthread_blkcg();
- if (css)
- return css;
- return task_css(current, io_cgrp_id);
-}
-
-/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use. The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio. This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it. However, the latter potentially gets
- * it from task_css(). This can race against task migration and the cgroup
- * dying. It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return css_to_blkcg(blkcg_css());
-}
-
-/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
* @return: true if this bio needs to be submitted with the root blkg context.
*
* In order to avoid priority inversions we sometimes need to issue a bio as if
* it were attached to the root blkg, and then backcharge to the actual owning
- * blkg. The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio. Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
+ * blkg. The idea is we do bio_blkcg_css() to look up the actual context for
+ * the bio and attach the appropriate blkg to the bio. Then we call this helper
+ * and if it is true run with the root blkg for that queue and then do any
* backcharging to the originating cgroup once the io is complete.
*/
static inline bool bio_issue_as_root_blkg(struct bio *bio)
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
struct blkcg_policy {
};
-#ifdef CONFIG_BLOCK
+struct blkcg {
+};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
-
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
-#endif /* CONFIG_BLOCK */
#endif /* CONFIG_BLK_CGROUP */
#endif /* _BLK_CGROUP_PRIVATE_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index 937bb6b86331..80fa73c419a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -50,7 +50,6 @@
#include "blk-pm.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
-#include "blk-rq-qos.h"
struct dentry *blk_debugfs_root;
@@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q)
*/
blk_freeze_queue(q);
- /* cleanup rq qos structures for queue without disk */
- rq_qos_exit(q);
-
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
@@ -592,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
- "%pg: rw=%d, want=%llu, limit=%llu\n",
- current->comm,
- bio->bi_bdev, bio->bi_opf,
- bio_end_sector(bio), maxsector);
+ "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
+ current->comm, bio->bi_bdev, bio->bi_opf,
+ bio->bi_iter.bi_sector, nr_sectors, maxsector);
return -EIO;
}
return 0;
@@ -820,11 +815,11 @@ void submit_bio_noacct(struct bio *bio)
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(bdev))
goto not_supported;
break;
case REQ_OP_SECURE_ERASE:
- if (!blk_queue_secure_erase(q))
+ if (!bdev_max_secure_erase_sectors(bdev))
goto not_supported;
break;
case REQ_OP_ZONE_APPEND:
@@ -893,19 +888,11 @@ void submit_bio(struct bio *bio)
if (blkcg_punt_bio_submit(bio))
return;
- /*
- * If it's a regular read/write or a barrier with data attached,
- * go through the normal accounting stuff before submission.
- */
- if (bio_has_data(bio)) {
- unsigned int count = bio_sectors(bio);
-
- if (op_is_write(bio_op(bio))) {
- count_vm_events(PGPGOUT, count);
- } else {
- task_io_account_read(bio->bi_iter.bi_size);
- count_vm_events(PGPGIN, count);
- }
+ if (bio_op(bio) == REQ_OP_READ) {
+ task_io_account_read(bio->bi_iter.bi_size);
+ count_vm_events(PGPGIN, bio_sectors(bio));
+ } else if (bio_op(bio) == REQ_OP_WRITE) {
+ count_vm_events(PGPGOUT, bio_sectors(bio));
}
/*
@@ -1022,21 +1009,22 @@ again:
}
}
-static unsigned long __part_start_io_acct(struct block_device *part,
- unsigned int sectors, unsigned int op,
- unsigned long start_time)
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+ unsigned int sectors, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
part_stat_lock();
- update_io_ticks(part, start_time, false);
- part_stat_inc(part, ios[sgrp]);
- part_stat_add(part, sectors[sgrp], sectors);
- part_stat_local_inc(part, in_flight[op_is_write(op)]);
+ update_io_ticks(bdev, start_time, false);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
+ part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
return start_time;
}
+EXPORT_SYMBOL(bdev_start_io_acct);
/**
* bio_start_io_acct_time - start I/O accounting for bio based drivers
@@ -1045,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
*/
void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
{
- __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), start_time);
+ bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
@@ -1058,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
- unsigned int op)
-{
- return __part_start_io_acct(disk->part0, sectors, op, jiffies);
-}
-EXPORT_SYMBOL(disk_start_io_acct);
-
-static void __part_end_io_acct(struct block_device *part, unsigned int op,
- unsigned long start_time)
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
unsigned long duration = now - start_time;
part_stat_lock();
- update_io_ticks(part, now, true);
- part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_local_dec(part, in_flight[op_is_write(op)]);
+ update_io_ticks(bdev, now, true);
+ part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
+ part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
}
+EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
- struct block_device *orig_bdev)
+ struct block_device *orig_bdev)
{
- __part_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
- unsigned long start_time)
-{
- __part_end_io_acct(disk->part0, op, start_time);
-}
-EXPORT_SYMBOL(disk_end_io_acct);
-
/**
* blk_lld_busy - Check if underlying low-level drivers of a device are busy
* @q : the queue of the device being checked
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 7c854584b52b..621abd1b0e4d 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -152,23 +152,25 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
src_bio->bi_status = enc_bio->bi_status;
- bio_put(enc_bio);
+ bio_uninit(enc_bio);
+ kfree(enc_bio);
bio_endio(src_bio);
}
static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
{
+ unsigned int nr_segs = bio_segments(bio_src);
struct bvec_iter iter;
struct bio_vec bv;
struct bio *bio;
- bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
+ bio = bio_kmalloc(nr_segs, GFP_NOIO);
if (!bio)
return NULL;
- bio->bi_bdev = bio_src->bi_bdev;
+ bio_init(bio, bio_src->bi_bdev, bio->bi_inline_vecs, nr_segs,
+ bio_src->bi_opf);
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
- bio->bi_opf = bio_src->bi_opf;
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
@@ -177,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
bio->bi_io_vec[bio->bi_vcnt++] = bv;
bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
return bio;
}
@@ -363,8 +364,8 @@ out_release_keyslot:
blk_crypto_put_keyslot(slot);
out_put_enc_bio:
if (enc_bio)
- bio_put(enc_bio);
-
+ bio_uninit(enc_bio);
+ kfree(enc_bio);
return ret;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 70a0a3d680a3..33a11ba971ea 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -533,8 +533,7 @@ struct ioc_gq {
/* statistics */
struct iocg_pcpu_stat __percpu *pcpu_stat;
- struct iocg_stat local_stat;
- struct iocg_stat desc_stat;
+ struct iocg_stat stat;
struct iocg_stat last_stat;
u64 last_stat_abs_vusage;
u64 usage_delta_us;
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
return true;
} else {
if (iocg->indelay_since) {
- iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
+ iocg->stat.indelay_us += now->now - iocg->indelay_since;
iocg->indelay_since = 0;
}
iocg->delay = 0;
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
/* if debt is paid in full, restore inuse */
if (!iocg->abs_vdebt) {
- iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
+ iocg->stat.indebt_us += now->now - iocg->indebt_since;
iocg->indebt_since = 0;
propagate_weights(iocg, iocg->active, iocg->last_inuse,
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
if (!waitqueue_active(&iocg->waitq)) {
if (iocg->wait_since) {
- iocg->local_stat.wait_us += now->now - iocg->wait_since;
+ iocg->stat.wait_us += now->now - iocg->wait_since;
iocg->wait_since = 0;
}
return;
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
}
}
+/* propagate the deltas to the parent */
+static void iocg_flush_stat_upward(struct ioc_gq *iocg)
+{
+ if (iocg->level > 0) {
+ struct iocg_stat *parent_stat =
+ &iocg->ancestors[iocg->level - 1]->stat;
+
+ parent_stat->usage_us +=
+ iocg->stat.usage_us - iocg->last_stat.usage_us;
+ parent_stat->wait_us +=
+ iocg->stat.wait_us - iocg->last_stat.wait_us;
+ parent_stat->indebt_us +=
+ iocg->stat.indebt_us - iocg->last_stat.indebt_us;
+ parent_stat->indelay_us +=
+ iocg->stat.indelay_us - iocg->last_stat.indelay_us;
+ }
+
+ iocg->last_stat = iocg->stat;
+}
+
/* collect per-cpu counters and propagate the deltas to the parent */
-static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
+static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
- struct iocg_stat new_stat;
u64 abs_vusage = 0;
u64 vusage_delta;
int cpu;
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
iocg->last_stat_abs_vusage = abs_vusage;
iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
- iocg->local_stat.usage_us += iocg->usage_delta_us;
-
- /* propagate upwards */
- new_stat.usage_us =
- iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
- new_stat.wait_us =
- iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
- new_stat.indebt_us =
- iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
- new_stat.indelay_us =
- iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
-
- /* propagate the deltas to the parent */
- if (iocg->level > 0) {
- struct iocg_stat *parent_stat =
- &iocg->ancestors[iocg->level - 1]->desc_stat;
+ iocg->stat.usage_us += iocg->usage_delta_us;
- parent_stat->usage_us +=
- new_stat.usage_us - iocg->last_stat.usage_us;
- parent_stat->wait_us +=
- new_stat.wait_us - iocg->last_stat.wait_us;
- parent_stat->indebt_us +=
- new_stat.indebt_us - iocg->last_stat.indebt_us;
- parent_stat->indelay_us +=
- new_stat.indelay_us - iocg->last_stat.indelay_us;
- }
-
- iocg->last_stat = new_stat;
+ iocg_flush_stat_upward(iocg);
}
/* get stat counters ready for reading on all active iocgs */
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
/* flush leaves and build inner node walk list */
list_for_each_entry(iocg, target_iocgs, active_list) {
- iocg_flush_stat_one(iocg, now);
+ iocg_flush_stat_leaf(iocg, now);
iocg_build_inner_walk(iocg, &inner_walk);
}
/* keep flushing upwards by walking the inner list backwards */
list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
- iocg_flush_stat_one(iocg, now);
+ iocg_flush_stat_upward(iocg);
list_del_init(&iocg->walk_list);
}
}
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
/* flush wait and indebt stat deltas */
if (iocg->wait_since) {
- iocg->local_stat.wait_us += now->now - iocg->wait_since;
+ iocg->stat.wait_us += now->now - iocg->wait_since;
iocg->wait_since = now->now;
}
if (iocg->indebt_since) {
- iocg->local_stat.indebt_us +=
+ iocg->stat.indebt_us +=
now->now - iocg->indebt_since;
iocg->indebt_since = now->now;
}
if (iocg->indelay_since) {
- iocg->local_stat.indelay_us +=
+ iocg->stat.indelay_us +=
now->now - iocg->indelay_since;
iocg->indelay_since = now->now;
}
@@ -2322,7 +2315,17 @@ static void ioc_timer_fn(struct timer_list *timer)
iocg->hweight_donating = hwa;
iocg->hweight_after_donation = new_hwi;
list_add(&iocg->surplus_list, &surpluses);
- } else {
+ } else if (!iocg->abs_vdebt) {
+ /*
+ * @iocg doesn't have enough to donate. Reset
+ * its inuse to active.
+ *
+ * Don't reset debtors as their inuse's are
+ * owned by debt handling. This shouldn't affect
+ * donation calculuation in any meaningful way
+ * as @iocg doesn't have a meaningful amount of
+ * share anyway.
+ */
TRACE_IOCG_PATH(inuse_shortage, iocg, &now,
iocg->inuse, iocg->active,
iocg->hweight_inuse, new_hwi);
@@ -2995,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
kfree(iocg);
}
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
if (!ioc->enabled)
- return false;
+ return;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3017,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
iocg->last_stat.wait_us,
iocg->last_stat.indebt_us,
iocg->last_stat.indelay_us);
- return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 2f33932e72e3..5b676c7cf2b6 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
return 0;
}
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
@@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
(unsigned long long)stat.ps.missed,
(unsigned long long)stat.ps.total,
iolat->rq_depth.max_depth);
- return true;
}
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return false;
+ return;
if (iolat->ssd)
return iolatency_ssd_stat(iolat, s);
@@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
else
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
iolat->rq_depth.max_depth, avg_lat, cur_win);
- return true;
}
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 237d60d8b585..09b7e1200c0f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -10,30 +10,44 @@
#include "blk.h"
+static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
+{
+ unsigned int discard_granularity = bdev_discard_granularity(bdev);
+ sector_t granularity_aligned_sector;
+
+ if (bdev_is_partition(bdev))
+ sector += bdev->bd_start_sect;
+
+ granularity_aligned_sector =
+ round_up(sector, discard_granularity >> SECTOR_SHIFT);
+
+ /*
+ * Make sure subsequent bios start aligned to the discard granularity if
+ * it needs to be split.
+ */
+ if (granularity_aligned_sector != sector)
+ return granularity_aligned_sector - sector;
+
+ /*
+ * Align the bio size to the discard granularity to make splitting the bio
+ * at discard granularity boundaries easier in the driver if needed.
+ */
+ return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
+}
+
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int flags,
- struct bio **biop)
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
- struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
- unsigned int op;
- sector_t bs_mask, part_offset = 0;
+ sector_t bs_mask;
if (bdev_read_only(bdev))
return -EPERM;
-
- if (flags & BLKDEV_DISCARD_SECURE) {
- if (!blk_queue_secure_erase(q))
- return -EOPNOTSUPP;
- op = REQ_OP_SECURE_ERASE;
- } else {
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
- op = REQ_OP_DISCARD;
- }
+ if (!bdev_max_discard_sectors(bdev))
+ return -EOPNOTSUPP;
/* In case the discard granularity isn't set by buggy device driver */
- if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
+ if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
char dev_name[BDEVNAME_SIZE];
bdevname(bdev, dev_name);
@@ -48,38 +62,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (!nr_sects)
return -EINVAL;
- /* In case the discard request is in a partition */
- if (bdev_is_partition(bdev))
- part_offset = bdev->bd_start_sect;
-
while (nr_sects) {
- sector_t granularity_aligned_lba, req_sects;
- sector_t sector_mapped = sector + part_offset;
-
- granularity_aligned_lba = round_up(sector_mapped,
- q->limits.discard_granularity >> SECTOR_SHIFT);
-
- /*
- * Check whether the discard bio starts at a discard_granularity
- * aligned LBA,
- * - If no: set (granularity_aligned_lba - sector_mapped) to
- * bi_size of the first split bio, then the second bio will
- * start at a discard_granularity aligned LBA on the device.
- * - If yes: use bio_aligned_discard_max_sectors() as the max
- * possible bi_size of the first split bio. Then when this bio
- * is split in device drive, the split ones are very probably
- * to be aligned to discard_granularity of the device's queue.
- */
- if (granularity_aligned_lba == sector_mapped)
- req_sects = min_t(sector_t, nr_sects,
- bio_aligned_discard_max_sectors(q));
- else
- req_sects = min_t(sector_t, nr_sects,
- granularity_aligned_lba - sector_mapped);
-
- WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
+ sector_t req_sects =
+ min(nr_sects, bio_discard_limit(bdev, sector));
- bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_iter.bi_size = req_sects << 9;
sector += req_sects;
@@ -105,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_DISCARD_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
*/
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+ sector_t nr_sects, gfp_t gfp_mask)
{
struct bio *bio = NULL;
struct blk_plug plug;
int ret;
blk_start_plug(&plug);
- ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
- &bio);
+ ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
if (!ret && bio) {
ret = submit_bio_wait(bio);
if (ret == -EOPNOTSUPP)
@@ -316,3 +301,42 @@ retry:
return ret;
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp)
+{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ if (max_sectors == 0)
+ return -EOPNOTSUPP;
+ if ((sector | nr_sects) & bs_mask)
+ return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ blk_start_plug(&plug);
+ for (;;) {
+ unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+
+ bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_iter.bi_size = len;
+
+ sector += len << SECTOR_SHIFT;
+ nr_sects -= len << SECTOR_SHIFT;
+ if (!nr_sects) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ break;
+ }
+ cond_resched();
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_secure_erase);
diff --git a/block/blk-map.c b/block/blk-map.c
index c7f71d83eff1..df8b066cd548 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -152,10 +152,10 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
ret = -ENOMEM;
- bio = bio_kmalloc(gfp_mask, nr_pages);
+ bio = bio_kmalloc(nr_pages, gfp_mask);
if (!bio)
goto out_bmd;
- bio->bi_opf |= req_op(rq);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq));
if (map_data) {
nr_pages = 1 << map_data->page_order;
@@ -224,7 +224,8 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
cleanup:
if (!map_data)
bio_free_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
out_bmd:
kfree(bmd);
return ret;
@@ -234,6 +235,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
+ unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
int j;
@@ -241,10 +243,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (!iov_iter_count(iter))
return -EINVAL;
- bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
+ bio = bio_kmalloc(nr_vecs, gfp_mask);
if (!bio)
return -ENOMEM;
- bio->bi_opf |= req_op(rq);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
while (iov_iter_count(iter)) {
struct page **pages;
@@ -260,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
- if (unlikely(offs & queue_dma_alignment(rq->q))) {
- ret = -EINVAL;
+ if (unlikely(offs & queue_dma_alignment(rq->q)))
j = 0;
- } else {
+ else {
for (j = 0; j < npages; j++) {
struct page *page = pages[j];
unsigned int n = PAGE_SIZE - offs;
@@ -303,7 +304,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
out_unmap:
bio_release_pages(bio, false);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
return ret;
}
@@ -323,7 +325,8 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
static void bio_map_kern_endio(struct bio *bio)
{
bio_invalidate_vmalloc_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
}
/**
@@ -348,9 +351,10 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
int offset, i;
struct bio *bio;
- bio = bio_kmalloc(gfp_mask, nr_pages);
+ bio = bio_kmalloc(nr_pages, gfp_mask);
if (!bio)
return ERR_PTR(-ENOMEM);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
if (is_vmalloc) {
flush_kernel_vmap_range(data, len);
@@ -374,7 +378,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
if (bio_add_pc_page(q, bio, page, bytes,
offset) < bytes) {
/* we don't support partial mappings */
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
return ERR_PTR(-EINVAL);
}
@@ -390,7 +395,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
static void bio_copy_kern_endio(struct bio *bio)
{
bio_free_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
}
static void bio_copy_kern_endio_read(struct bio *bio)
@@ -435,9 +441,10 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
return ERR_PTR(-EINVAL);
nr_pages = end - start;
- bio = bio_kmalloc(gfp_mask, nr_pages);
+ bio = bio_kmalloc(nr_pages, gfp_mask);
if (!bio)
return ERR_PTR(-ENOMEM);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
while (len) {
struct page *page;
@@ -471,7 +478,8 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
cleanup:
bio_free_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
return ERR_PTR(-ENOMEM);
}
@@ -602,7 +610,8 @@ int blk_rq_unmap_user(struct bio *bio)
next_bio = bio;
bio = bio->bi_next;
- bio_put(next_bio);
+ bio_uninit(next_bio);
+ kfree(next_bio);
}
return ret;
@@ -648,8 +657,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
bio->bi_opf |= req_op(rq);
ret = blk_rq_append_bio(rq, bio);
- if (unlikely(ret))
- bio_put(bio);
+ if (unlikely(ret)) {
+ bio_uninit(bio);
+ kfree(bio);
+ }
return ret;
}
EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index aa0349e9f083..7e4136a60e1c 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -113,10 +113,8 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(FAIL_IO),
QUEUE_FLAG_NAME(NONROT),
QUEUE_FLAG_NAME(IO_STAT),
- QUEUE_FLAG_NAME(DISCARD),
QUEUE_FLAG_NAME(NOXMERGES),
QUEUE_FLAG_NAME(ADD_RANDOM),
- QUEUE_FLAG_NAME(SECERASE),
QUEUE_FLAG_NAME(SAME_FORCE),
QUEUE_FLAG_NAME(DEAD),
QUEUE_FLAG_NAME(INIT_DONE),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c4370d276170..ae116b755648 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1083,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
/*
- * For a polled request, always complete locallly, it's pointless
+ * For a polled request, always complete locally, it's pointless
* to redirect the completion.
*/
if (rq->cmd_flags & REQ_POLLED)
@@ -1131,14 +1131,7 @@ void blk_mq_start_request(struct request *rq)
trace_block_rq_issue(rq);
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
- u64 start_time;
-#ifdef CONFIG_BLK_CGROUP
- if (rq->bio)
- start_time = bio_issue_time(&rq->bio->bi_issue);
- else
-#endif
- start_time = ktime_get_ns();
- rq->io_start_time_ns = start_time;
+ rq->io_start_time_ns = ktime_get_ns();
rq->stats_sectors = blk_rq_sectors(rq);
rq->rq_flags |= RQF_STATS;
rq_qos_issue(q, rq);
@@ -1176,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
complete(waiting);
}
+/*
+ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+ if (plug->multiple_queues)
+ return BLK_MAX_REQUEST_COUNT * 2;
+ return BLK_MAX_REQUEST_COUNT;
+}
+
+static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+{
+ struct request *last = rq_list_peek(&plug->mq_list);
+
+ if (!plug->rq_count) {
+ trace_block_plug(rq->q);
+ } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
+ (!blk_queue_nomerges(rq->q) &&
+ blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+ blk_mq_flush_plug_list(plug, false);
+ trace_block_plug(rq->q);
+ }
+
+ if (!plug->multiple_queues && last && last->q != rq->q)
+ plug->multiple_queues = true;
+ if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+ plug->has_elevator = true;
+ rq->rq_next = NULL;
+ rq_list_add(&plug->mq_list, rq);
+ plug->rq_count++;
+}
+
+static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
+ rq_end_io_fn *done, bool use_plug)
+{
+ WARN_ON(irqs_disabled());
+ WARN_ON(!blk_rq_is_passthrough(rq));
+
+ rq->end_io = done;
+
+ blk_account_io_start(rq);
+
+ if (use_plug && current->plug) {
+ blk_add_rq_to_plug(current->plug, rq);
+ return;
+ }
+ /*
+ * don't check dying flag for MQ because the request won't
+ * be reused after dying flag is set
+ */
+ blk_mq_sched_insert_request(rq, at_head, true, false);
+}
+
+
/**
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
* @rq: request to insert
@@ -1191,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
*/
void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
{
- WARN_ON(irqs_disabled());
- WARN_ON(!blk_rq_is_passthrough(rq));
-
- rq->end_io = done;
+ __blk_execute_rq_nowait(rq, at_head, done, true);
- blk_account_io_start(rq);
-
- /*
- * don't check dying flag for MQ because the request won't
- * be reused after dying flag is set
- */
- blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
@@ -1240,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
DECLARE_COMPLETION_ONSTACK(wait);
unsigned long hang_check;
+ /*
+ * iopoll requires request to be submitted to driver, so can't
+ * use plug
+ */
rq->end_io_data = &wait;
- blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
+ __blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
+ !blk_rq_is_poll(rq));
/* Prevent hang_check timer from firing at us during very long I/O */
hang_check = sysctl_hung_task_timeout_secs;
@@ -2683,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
hctx->queue->mq_ops->commit_rqs(hctx);
}
-/*
- * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
- * queues. This is important for md arrays to benefit from merging
- * requests.
- */
-static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
-{
- if (plug->multiple_queues)
- return BLK_MAX_REQUEST_COUNT * 2;
- return BLK_MAX_REQUEST_COUNT;
-}
-
-static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
-{
- struct request *last = rq_list_peek(&plug->mq_list);
-
- if (!plug->rq_count) {
- trace_block_plug(rq->q);
- } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
- (!blk_queue_nomerges(rq->q) &&
- blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
- blk_mq_flush_plug_list(plug, false);
- trace_block_plug(rq->q);
- }
-
- if (!plug->multiple_queues && last && last->q != rq->q)
- plug->multiple_queues = true;
- if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
- plug->has_elevator = true;
- rq->rq_next = NULL;
- rq_list_add(&plug->mq_list, rq);
- plug->rq_count++;
-}
-
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs)
{
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b83df3d2eebc..6ccceb421ed2 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_zone_append_sectors = 0;
lim->max_discard_sectors = 0;
lim->max_hw_discard_sectors = 0;
+ lim->max_secure_erase_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
lim->discard_misaligned = 0;
@@ -177,6 +178,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
+ * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
+ * @q: the request queue for the device
+ * @max_sectors: maximum number of sectors to secure_erase
+ **/
+void blk_queue_max_secure_erase_sectors(struct request_queue *q,
+ unsigned int max_sectors)
+{
+ q->limits.max_secure_erase_sectors = max_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
+
+/**
* blk_queue_max_write_zeroes_sectors - set max sectors for a single
* write zeroes
* @q: the request queue for the device
@@ -468,6 +481,40 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
}
EXPORT_SYMBOL(blk_queue_io_opt);
+static int queue_limit_alignment_offset(struct queue_limits *lim,
+ sector_t sector)
+{
+ unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+ unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
+ << SECTOR_SHIFT;
+
+ return (granularity + lim->alignment_offset - alignment) % granularity;
+}
+
+static unsigned int queue_limit_discard_alignment(struct queue_limits *lim,
+ sector_t sector)
+{
+ unsigned int alignment, granularity, offset;
+
+ if (!lim->max_discard_sectors)
+ return 0;
+
+ /* Why are these in bytes, not sectors? */
+ alignment = lim->discard_alignment >> SECTOR_SHIFT;
+ granularity = lim->discard_granularity >> SECTOR_SHIFT;
+ if (!granularity)
+ return 0;
+
+ /* Offset of the partition start in 'granularity' sectors */
+ offset = sector_div(sector, granularity);
+
+ /* And why do we do this modulus *again* in blkdev_issue_discard()? */
+ offset = (granularity + alignment - offset) % granularity;
+
+ /* Turn it back into bytes, gaah */
+ return offset << SECTOR_SHIFT;
+}
+
static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
{
sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
@@ -627,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
t->discard_granularity;
}
-
+ t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors,
+ b->max_secure_erase_sectors);
t->zone_write_granularity = max(t->zone_write_granularity,
b->zone_write_granularity);
t->zoned = max(t->zoned, b->zoned);
@@ -901,3 +949,27 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
}
}
EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
+
+int bdev_alignment_offset(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q->limits.misaligned)
+ return -1;
+ if (bdev_is_partition(bdev))
+ return queue_limit_alignment_offset(&q->limits,
+ bdev->bd_start_sect);
+ return q->limits.alignment_offset;
+}
+EXPORT_SYMBOL_GPL(bdev_alignment_offset);
+
+unsigned int bdev_discard_alignment(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (bdev_is_partition(bdev))
+ return queue_limit_discard_alignment(&q->limits,
+ bdev->bd_start_sect);
+ return q->limits.discard_alignment;
+}
+EXPORT_SYMBOL_GPL(bdev_discard_alignment);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 469c483719be..139b2d7a99e2 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
break; \
if ((__tg)) { \
blk_add_cgroup_trace_msg(__td->queue, \
- tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
+ &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
} else { \
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
} \
@@ -2189,13 +2189,14 @@ again:
}
out_unlock:
- spin_unlock_irq(&q->queue_lock);
bio_set_flag(bio, BIO_THROTTLED);
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
#endif
+ spin_unlock_irq(&q->queue_lock);
+
rcu_read_unlock();
return throttled;
}
diff --git a/block/blk.h b/block/blk.h
index 8ccbc6e07636..434017701403 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -347,20 +347,6 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
}
/*
- * The max bio size which is aligned to q->limits.discard_granularity. This
- * is a hint to split large discard bio in generic block layer, then if device
- * driver needs to split the discard bio into smaller ones, their bi_size can
- * be very probably and easily aligned to discard_granularity of the device's
- * queue.
- */
-static inline unsigned int bio_aligned_discard_max_sectors(
- struct request_queue *q)
-{
- return round_down(UINT_MAX, q->limits.discard_granularity) >>
- SECTOR_SHIFT;
-}
-
-/*
* Internal io_context interface
*/
struct io_cq *ioc_find_get_icq(struct request_queue *q);
@@ -450,13 +436,6 @@ extern struct device_attribute dev_attr_events;
extern struct device_attribute dev_attr_events_async;
extern struct device_attribute dev_attr_events_poll_msecs;
-static inline void bio_clear_polled(struct bio *bio)
-{
- /* can't support alloc cache if we turn off polling */
- bio_clear_flag(bio, BIO_PERCPU_CACHE);
- bio->bi_opf &= ~REQ_POLLED;
-}
-
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
diff --git a/block/bounce.c b/block/bounce.c
index 467be46d0e65..8f7b6fe3b4db 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
goto err_put;
bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
return bio;
diff --git a/block/fops.c b/block/fops.c
index 9f2ecec406b0..b9b83030e0df 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
#define DIO_INLINE_BIO_VECS 4
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
- struct task_struct *waiter = bio->bi_private;
-
- WRITE_ONCE(bio->bi_private, NULL);
- blk_wake_io_task(waiter);
-}
-
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages)
{
@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
- bio.bi_private = current;
- bio.bi_end_io = blkdev_bio_end_io_simple;
bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT)
bio.bi_opf |= REQ_NOWAIT;
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(&bio, iocb);
- submit_bio(&bio);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio.bi_private))
- break;
- if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
+ submit_bio_wait(&bio);
bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status))
@@ -197,8 +177,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
-
+ if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+ opf |= REQ_ALLOC_CACHE;
+ bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+ &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
atomic_set(&dio->ref, 1);
/*
@@ -320,7 +302,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
+ if (iocb->ki_flags & IOCB_ALLOC_CACHE)
+ opf |= REQ_ALLOC_CACHE;
+ bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
+ &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->flags = 0;
dio->iocb = iocb;
@@ -672,7 +657,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
- len >> SECTOR_SHIFT, GFP_KERNEL, 0);
+ len >> SECTOR_SHIFT, GFP_KERNEL);
break;
default:
error = -EOPNOTSUPP;
diff --git a/block/genhd.c b/block/genhd.c
index b8b6759d670f..36532b931841 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1010,7 +1010,7 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
{
struct gendisk *disk = dev_to_disk(dev);
- return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
+ return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
}
static ssize_t disk_discard_alignment_show(struct device *dev,
@@ -1019,7 +1019,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
{
struct gendisk *disk = dev_to_disk(dev);
- return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
+ return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
}
static ssize_t diskseq_show(struct device *dev,
diff --git a/block/ioctl.c b/block/ioctl.c
index f8703db99c73..46949f1b0dba 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -83,18 +83,17 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
#endif
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
- unsigned long arg, unsigned long flags)
+ unsigned long arg)
{
uint64_t range[2];
uint64_t start, len;
- struct request_queue *q = bdev_get_queue(bdev);
struct inode *inode = bdev->bd_inode;
int err;
if (!(mode & FMODE_WRITE))
return -EBADF;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(bdev))
return -EOPNOTSUPP;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
@@ -115,15 +114,43 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
goto fail;
-
- err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
- GFP_KERNEL, flags);
-
+ err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
fail:
filemap_invalidate_unlock(inode->i_mapping);
return err;
}
+static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
+ void __user *argp)
+{
+ uint64_t start, len;
+ uint64_t range[2];
+ int err;
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+ if (!bdev_max_secure_erase_sectors(bdev))
+ return -EOPNOTSUPP;
+ if (copy_from_user(range, argp, sizeof(range)))
+ return -EFAULT;
+
+ start = range[0];
+ len = range[1];
+ if ((start & 511) || (len & 511))
+ return -EINVAL;
+ if (start + len > bdev_nr_bytes(bdev))
+ return -EINVAL;
+
+ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+ if (!err)
+ err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
+ GFP_KERNEL);
+ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+ return err;
+}
+
+
static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
unsigned long arg)
{
@@ -451,10 +478,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
case BLKROSET:
return blkdev_roset(bdev, mode, cmd, arg);
case BLKDISCARD:
- return blk_ioctl_discard(bdev, mode, arg, 0);
+ return blk_ioctl_discard(bdev, mode, arg);
case BLKSECDISCARD:
- return blk_ioctl_discard(bdev, mode, arg,
- BLKDEV_DISCARD_SECURE);
+ return blk_ioctl_secure_erase(bdev, mode, argp);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
case BLKGETDISKSEQ:
@@ -489,7 +515,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
queue_max_sectors(bdev_get_queue(bdev)));
return put_ushort(argp, max_sectors);
case BLKROTATIONAL:
- return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev)));
+ return put_ushort(argp, !bdev_nonrot(bdev));
case BLKRASET:
case BLKFRASET:
if(!capable(CAP_SYS_ADMIN))
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 3ed5eaf3446a..6ed602b2f80a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
if (at_head) {
list_add(&rq->queuelist, &per_prio->dispatch);
+ rq->fifo_time = jiffies;
} else {
deadline_add_rq_rb(per_prio, rq);
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index 2c381c694c57..d2fc122d7426 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
#ifdef CONFIG_ACORN_PARTITION_RISCIX
case PARTITION_RISCIX_SCSI:
case PARTITION_RISCIX_MFM:
- slot = riscix_partition(state, start_sect, slot,
+ riscix_partition(state, start_sect, slot,
nr_sects);
break;
#endif
case PARTITION_LINUX:
- slot = linux_partition(state, start_sect, slot,
+ linux_partition(state, start_sect, slot,
nr_sects);
break;
}
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index da5994175416..9655c728262a 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state)
/* accept only GEM,BGM,RAW,LNX,SWP partitions */
if (!((pi->flg & 1) && OK_id(pi->id)))
continue;
- part_fmt = 2;
put_partition (state, slot,
be32_to_cpu(pi->st),
be32_to_cpu(pi->siz));
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 2ef8dfa1e5c8..8a0ec929023b 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -200,21 +200,13 @@ static ssize_t part_ro_show(struct device *dev,
static ssize_t part_alignment_offset_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct block_device *bdev = dev_to_bdev(dev);
-
- return sprintf(buf, "%u\n",
- queue_limit_alignment_offset(&bdev_get_queue(bdev)->limits,
- bdev->bd_start_sect));
+ return sprintf(buf, "%u\n", bdev_alignment_offset(dev_to_bdev(dev)));
}
static ssize_t part_discard_alignment_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct block_device *bdev = dev_to_bdev(dev);
-
- return sprintf(buf, "%u\n",
- queue_limit_discard_alignment(&bdev_get_queue(bdev)->limits,
- bdev->bd_start_sect));
+ return sprintf(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
}
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
@@ -486,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno)
goto out_unlock;
ret = -EBUSY;
- if (part->bd_openers)
+ if (atomic_read(&part->bd_openers))
goto out_unlock;
delete_partition(part);
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index 27f6c7d9c776..38e58960ae03 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
len = r_cols;
} else {
r_stripe = 0;
- r_cols = 0;
len = r_parent;
}
if (len < 0)
@@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
len = r_id2;
- } else {
- r_id1 = 0;
- r_id2 = 0;
+ } else
len = r_diskid;
- }
if (len < 0)
return false;
@@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
len = r_id2;
- } else {
- r_id1 = 0;
- r_id2 = 0;
+ } else
len = r_name;
- }
if (len < 0)
return false;
@@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
return false;
}
len = r_index;
- } else {
- r_index = 0;
+ } else
len = r_diskid;
- }
if (len < 0) {
ldm_error("len %d < 0", len);
return false;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 4556c86c3465..eb95e188d62b 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -96,11 +96,6 @@ static const struct dmi_system_id processor_power_dmi_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")},
(void *)1},
- /* T40 can not handle C3 idle state */
- { set_max_cstate, "IBM ThinkPad T40", {
- DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
- DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")},
- (void *)2},
{},
};
@@ -795,7 +790,8 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr)
if (cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 ||
cx->type == ACPI_STATE_C3) {
state->enter_dead = acpi_idle_play_dead;
- drv->safe_state_index = count;
+ if (cx->type != ACPI_STATE_C3)
+ drv->safe_state_index = count;
}
/*
* Halt-induced C1 is not good for ->enter_s2idle, because it
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 8351c5638880..f3b639e89dd8 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2295,6 +2295,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
{
int ret = 0;
struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *tmppf;
struct binder_ptr_fixup *pf =
list_first_entry_or_null(pf_head, struct binder_ptr_fixup,
node);
@@ -2349,7 +2350,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
list_del(&sgc->node);
kfree(sgc);
}
- BUG_ON(!list_empty(pf_head));
+ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+ BUG_ON(pf->skip_size == 0);
+ list_del(&pf->node);
+ kfree(pf);
+ }
BUG_ON(!list_empty(sgc_head));
return ret > 0 ? -EINVAL : ret;
@@ -2486,6 +2491,9 @@ static int binder_translate_fd_array(struct list_head *pf_head,
struct binder_proc *proc = thread->proc;
int ret;
+ if (fda->num_fds == 0)
+ return 0;
+
fd_buf_size = sizeof(u32) * fda->num_fds;
if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n",
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 397dfd27c90d..c1eca72b4575 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -324,7 +324,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
{ PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */
{ PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
- { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG/Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
{ PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */
{ PCI_VDEVICE(INTEL, 0x1e03), board_ahci_low_power }, /* Panther M AHCI */
@@ -367,7 +366,9 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg/Lewisburg AHCI*/
- { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg/Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* *burg SATA0 'RAID' */
+ { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* *burg SATA1 'RAID' */
+ { PCI_VDEVICE(INTEL, 0x282f), board_ahci }, /* *burg SATA2 'RAID' */
{ PCI_VDEVICE(INTEL, 0x43d4), board_ahci }, /* Rocket Lake PCH-H RAID */
{ PCI_VDEVICE(INTEL, 0x43d5), board_ahci }, /* Rocket Lake PCH-H RAID */
{ PCI_VDEVICE(INTEL, 0x43d6), board_ahci }, /* Rocket Lake PCH-H RAID */
diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index ab8552b1ff2a..f61795c546cf 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -549,15 +549,10 @@ static int brcm_ahci_remove(struct platform_device *pdev)
struct ata_host *host = dev_get_drvdata(&pdev->dev);
struct ahci_host_priv *hpriv = host->private_data;
struct brcm_ahci_priv *priv = hpriv->plat_data;
- int ret;
brcm_sata_phys_disable(priv);
- ret = ata_platform_remove_one(pdev);
- if (ret)
- return ret;
-
- return 0;
+ return ata_platform_remove_one(pdev);
}
static void brcm_ahci_shutdown(struct platform_device *pdev)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index ca64837641be..40e816419f48 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -96,7 +96,8 @@ struct ata_force_param {
unsigned long xfer_mask;
unsigned int horkage_on;
unsigned int horkage_off;
- u16 lflags;
+ u16 lflags_on;
+ u16 lflags_off;
};
struct ata_force_ent {
@@ -386,11 +387,17 @@ static void ata_force_link_limits(struct ata_link *link)
}
/* let lflags stack */
- if (fe->param.lflags) {
- link->flags |= fe->param.lflags;
+ if (fe->param.lflags_on) {
+ link->flags |= fe->param.lflags_on;
ata_link_notice(link,
"FORCE: link flag 0x%x forced -> 0x%x\n",
- fe->param.lflags, link->flags);
+ fe->param.lflags_on, link->flags);
+ }
+ if (fe->param.lflags_off) {
+ link->flags &= ~fe->param.lflags_off;
+ ata_link_notice(link,
+ "FORCE: link flag 0x%x cleared -> 0x%x\n",
+ fe->param.lflags_off, link->flags);
}
}
}
@@ -898,7 +905,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
* RETURNS:
* Matching xfer_shift, -1 if no match found.
*/
-int ata_xfer_mode2shift(unsigned long xfer_mode)
+int ata_xfer_mode2shift(u8 xfer_mode)
{
const struct ata_xfer_ent *ent;
@@ -1398,7 +1405,7 @@ unsigned long ata_id_xfermask(const u16 *id)
/* But wait.. there's more. Design your standards by
* committee and you too can get a free iordy field to
- * process. However its the speeds not the modes that
+ * process. However it is the speeds not the modes that
* are supported... Note drivers using the timing API
* will get this right anyway
*/
@@ -3898,7 +3905,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
/* Devices where NCQ should be avoided */
/* NCQ is slow */
{ "WDC WD740ADFD-00", NULL, ATA_HORKAGE_NONCQ },
- { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ, },
+ { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ },
/* http://thread.gmane.org/gmane.linux.ide/14907 */
{ "FUJITSU MHT2060BH", NULL, ATA_HORKAGE_NONCQ },
/* NCQ is broken */
@@ -3924,23 +3931,23 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
/* drives which fail FPDMA_AA activation (some may freeze afterwards)
the ST disks also have LPM issues */
{ "ST1000LM024 HN-M101MBB", NULL, ATA_HORKAGE_BROKEN_FPDMA_AA |
- ATA_HORKAGE_NOLPM, },
+ ATA_HORKAGE_NOLPM },
{ "VB0250EAVER", "HPG7", ATA_HORKAGE_BROKEN_FPDMA_AA },
/* Blacklist entries taken from Silicon Image 3124/3132
Windows driver .inf file - also several Linux problem reports */
- { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ, },
- { "HTS541080G9SA00", "MB4OC60D", ATA_HORKAGE_NONCQ, },
- { "HTS541010G9SA00", "MBZOC60D", ATA_HORKAGE_NONCQ, },
+ { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ },
+ { "HTS541080G9SA00", "MB4OC60D", ATA_HORKAGE_NONCQ },
+ { "HTS541010G9SA00", "MBZOC60D", ATA_HORKAGE_NONCQ },
/* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */
- { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ, },
+ { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ },
/* Sandisk SD7/8/9s lock up hard on large trims */
- { "SanDisk SD[789]*", NULL, ATA_HORKAGE_MAX_TRIM_128M, },
+ { "SanDisk SD[789]*", NULL, ATA_HORKAGE_MAX_TRIM_128M },
/* devices which puke on READ_NATIVE_MAX */
- { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, },
+ { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA },
{ "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA },
{ "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
{ "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA },
@@ -3949,22 +3956,22 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA },
/* Devices which report 1 sector over size HPA */
- { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, },
- { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, },
- { "ST310211A", NULL, ATA_HORKAGE_HPA_SIZE, },
+ { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE },
+ { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE },
+ { "ST310211A", NULL, ATA_HORKAGE_HPA_SIZE },
/* Devices which get the IVB wrong */
- { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB, },
+ { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB },
/* Maybe we should just blacklist TSSTcorp... */
- { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]", ATA_HORKAGE_IVB, },
+ { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]", ATA_HORKAGE_IVB },
/* Devices that do not need bridging limits applied */
- { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, },
- { "BUFFALO HD-QSU2/R5", NULL, ATA_HORKAGE_BRIDGE_OK, },
+ { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK },
+ { "BUFFALO HD-QSU2/R5", NULL, ATA_HORKAGE_BRIDGE_OK },
/* Devices which aren't very happy with higher link speeds */
- { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, },
- { "Seagate FreeAgent GoFlex", NULL, ATA_HORKAGE_1_5_GBPS, },
+ { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS },
+ { "Seagate FreeAgent GoFlex", NULL, ATA_HORKAGE_1_5_GBPS },
/*
* Devices which choke on SETXFER. Applies only if both the
@@ -3982,57 +3989,57 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
/* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
{ "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
- ATA_HORKAGE_NOLPM, },
+ ATA_HORKAGE_NOLPM },
/* 512GB MX100 with newer firmware has only LPM issues */
{ "Crucial_CT512MX100*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM |
- ATA_HORKAGE_NOLPM, },
+ ATA_HORKAGE_NOLPM },
/* 480GB+ M500 SSDs have both queued TRIM and LPM issues */
{ "Crucial_CT480M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
- ATA_HORKAGE_NOLPM, },
+ ATA_HORKAGE_NOLPM },
{ "Crucial_CT960M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
- ATA_HORKAGE_NOLPM, },
+ ATA_HORKAGE_NOLPM },
/* These specific Samsung models/firmware-revs do not handle LPM well */
- { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
- { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, },
- { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, },
- { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, },
+ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM },
+ { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM },
+ { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM },
+ { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM },
/* devices that don't properly handle queued TRIM commands */
{ "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Micron_M5[15]0_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_NO_DMA_LOG |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Samsung SSD 860*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
- ATA_HORKAGE_NO_NCQ_ON_ATI, },
+ ATA_HORKAGE_NO_NCQ_ON_ATI },
{ "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
- ATA_HORKAGE_NO_NCQ_ON_ATI, },
+ ATA_HORKAGE_NO_NCQ_ON_ATI },
{ "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
/* devices that don't properly handle TRIM commands */
- { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, },
- { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, },
+ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM },
+ { "M88V29*", NULL, ATA_HORKAGE_NOTRIM },
/*
* As defined, the DRAT (Deterministic Read After Trim) and RZAT
@@ -4050,16 +4057,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
* The intel 510 drive has buggy DRAT/RZAT. Explicitly exclude
* that model before whitelisting all other intel SSDs.
*/
- { "INTEL*SSDSC2MH*", NULL, 0, },
+ { "INTEL*SSDSC2MH*", NULL, 0 },
- { "Micron*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "Crucial*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
- { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ { "Micron*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "Crucial*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM },
/*
* Some WD SATA-I drives spin up and down erratically when the link
@@ -4567,42 +4574,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
}
/**
- * ata_qc_new_init - Request an available ATA command, and initialize it
- * @dev: Device from whom we request an available command structure
- * @tag: tag
- *
- * LOCKING:
- * None.
- */
-
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
-{
- struct ata_port *ap = dev->link->ap;
- struct ata_queued_cmd *qc;
-
- /* no command while frozen */
- if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
- return NULL;
-
- /* libsas case */
- if (ap->flags & ATA_FLAG_SAS_HOST) {
- tag = ata_sas_allocate_tag(ap);
- if (tag < 0)
- return NULL;
- }
-
- qc = __ata_qc_from_tag(ap, tag);
- qc->tag = qc->hw_tag = tag;
- qc->scsicmd = NULL;
- qc->ap = ap;
- qc->dev = dev;
-
- ata_qc_reinit(qc);
-
- return qc;
-}
-
-/**
* ata_qc_free - free unused ata_queued_cmd
* @qc: Command to complete
*
@@ -4614,19 +4585,9 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
*/
void ata_qc_free(struct ata_queued_cmd *qc)
{
- struct ata_port *ap;
- unsigned int tag;
-
- WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
- ap = qc->ap;
-
qc->flags = 0;
- tag = qc->tag;
- if (ata_tag_valid(tag)) {
+ if (ata_tag_valid(qc->tag))
qc->tag = ATA_TAG_POISON;
- if (ap->flags & ATA_FLAG_SAS_HOST)
- ata_sas_free_tag(tag, ap);
- }
}
void __ata_qc_complete(struct ata_queued_cmd *qc)
@@ -5605,7 +5566,7 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops)
* Start and then freeze ports of @host. Started status is
* recorded in host->flags, so this function can be called
* multiple times. Ports are guaranteed to get started only
- * once. If host->ops isn't initialized yet, its set to the
+ * once. If host->ops is not initialized yet, it is set to the
* first non-dummy port ops.
*
* LOCKING:
@@ -6146,67 +6107,113 @@ int ata_platform_remove_one(struct platform_device *pdev)
EXPORT_SYMBOL_GPL(ata_platform_remove_one);
#ifdef CONFIG_ATA_FORCE
+
+#define force_cbl(name, flag) \
+ { #name, .cbl = (flag) }
+
+#define force_spd_limit(spd, val) \
+ { #spd, .spd_limit = (val) }
+
+#define force_xfer(mode, shift) \
+ { #mode, .xfer_mask = (1UL << (shift)) }
+
+#define force_lflag_on(name, flags) \
+ { #name, .lflags_on = (flags) }
+
+#define force_lflag_onoff(name, flags) \
+ { "no" #name, .lflags_on = (flags) }, \
+ { #name, .lflags_off = (flags) }
+
+#define force_horkage_on(name, flag) \
+ { #name, .horkage_on = (flag) }
+
+#define force_horkage_onoff(name, flag) \
+ { "no" #name, .horkage_on = (flag) }, \
+ { #name, .horkage_off = (flag) }
+
+static const struct ata_force_param force_tbl[] __initconst = {
+ force_cbl(40c, ATA_CBL_PATA40),
+ force_cbl(80c, ATA_CBL_PATA80),
+ force_cbl(short40c, ATA_CBL_PATA40_SHORT),
+ force_cbl(unk, ATA_CBL_PATA_UNK),
+ force_cbl(ign, ATA_CBL_PATA_IGN),
+ force_cbl(sata, ATA_CBL_SATA),
+
+ force_spd_limit(1.5Gbps, 1),
+ force_spd_limit(3.0Gbps, 2),
+
+ force_xfer(pio0, ATA_SHIFT_PIO + 0),
+ force_xfer(pio1, ATA_SHIFT_PIO + 1),
+ force_xfer(pio2, ATA_SHIFT_PIO + 2),
+ force_xfer(pio3, ATA_SHIFT_PIO + 3),
+ force_xfer(pio4, ATA_SHIFT_PIO + 4),
+ force_xfer(pio5, ATA_SHIFT_PIO + 5),
+ force_xfer(pio6, ATA_SHIFT_PIO + 6),
+ force_xfer(mwdma0, ATA_SHIFT_MWDMA + 0),
+ force_xfer(mwdma1, ATA_SHIFT_MWDMA + 1),
+ force_xfer(mwdma2, ATA_SHIFT_MWDMA + 2),
+ force_xfer(mwdma3, ATA_SHIFT_MWDMA + 3),
+ force_xfer(mwdma4, ATA_SHIFT_MWDMA + 4),
+ force_xfer(udma0, ATA_SHIFT_UDMA + 0),
+ force_xfer(udma16, ATA_SHIFT_UDMA + 0),
+ force_xfer(udma/16, ATA_SHIFT_UDMA + 0),
+ force_xfer(udma1, ATA_SHIFT_UDMA + 1),
+ force_xfer(udma25, ATA_SHIFT_UDMA + 1),
+ force_xfer(udma/25, ATA_SHIFT_UDMA + 1),
+ force_xfer(udma2, ATA_SHIFT_UDMA + 2),
+ force_xfer(udma33, ATA_SHIFT_UDMA + 2),
+ force_xfer(udma/33, ATA_SHIFT_UDMA + 2),
+ force_xfer(udma3, ATA_SHIFT_UDMA + 3),
+ force_xfer(udma44, ATA_SHIFT_UDMA + 3),
+ force_xfer(udma/44, ATA_SHIFT_UDMA + 3),
+ force_xfer(udma4, ATA_SHIFT_UDMA + 4),
+ force_xfer(udma66, ATA_SHIFT_UDMA + 4),
+ force_xfer(udma/66, ATA_SHIFT_UDMA + 4),
+ force_xfer(udma5, ATA_SHIFT_UDMA + 5),
+ force_xfer(udma100, ATA_SHIFT_UDMA + 5),
+ force_xfer(udma/100, ATA_SHIFT_UDMA + 5),
+ force_xfer(udma6, ATA_SHIFT_UDMA + 6),
+ force_xfer(udma133, ATA_SHIFT_UDMA + 6),
+ force_xfer(udma/133, ATA_SHIFT_UDMA + 6),
+ force_xfer(udma7, ATA_SHIFT_UDMA + 7),
+
+ force_lflag_on(nohrst, ATA_LFLAG_NO_HRST),
+ force_lflag_on(nosrst, ATA_LFLAG_NO_SRST),
+ force_lflag_on(norst, ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST),
+ force_lflag_on(rstonce, ATA_LFLAG_RST_ONCE),
+ force_lflag_onoff(dbdelay, ATA_LFLAG_NO_DEBOUNCE_DELAY),
+
+ force_horkage_onoff(ncq, ATA_HORKAGE_NONCQ),
+ force_horkage_onoff(ncqtrim, ATA_HORKAGE_NO_NCQ_TRIM),
+ force_horkage_onoff(ncqati, ATA_HORKAGE_NO_NCQ_ON_ATI),
+
+ force_horkage_onoff(trim, ATA_HORKAGE_NOTRIM),
+ force_horkage_on(trim_zero, ATA_HORKAGE_ZERO_AFTER_TRIM),
+ force_horkage_on(max_trim_128m, ATA_HORKAGE_MAX_TRIM_128M),
+
+ force_horkage_onoff(dma, ATA_HORKAGE_NODMA),
+ force_horkage_on(atapi_dmadir, ATA_HORKAGE_ATAPI_DMADIR),
+ force_horkage_on(atapi_mod16_dma, ATA_HORKAGE_ATAPI_MOD16_DMA),
+
+ force_horkage_onoff(dmalog, ATA_HORKAGE_NO_DMA_LOG),
+ force_horkage_onoff(iddevlog, ATA_HORKAGE_NO_ID_DEV_LOG),
+ force_horkage_onoff(logdir, ATA_HORKAGE_NO_LOG_DIR),
+
+ force_horkage_on(max_sec_128, ATA_HORKAGE_MAX_SEC_128),
+ force_horkage_on(max_sec_1024, ATA_HORKAGE_MAX_SEC_1024),
+ force_horkage_on(max_sec_lba48, ATA_HORKAGE_MAX_SEC_LBA48),
+
+ force_horkage_onoff(lpm, ATA_HORKAGE_NOLPM),
+ force_horkage_onoff(setxfer, ATA_HORKAGE_NOSETXFER),
+ force_horkage_on(dump_id, ATA_HORKAGE_DUMP_ID),
+
+ force_horkage_on(disable, ATA_HORKAGE_DISABLE),
+};
+
static int __init ata_parse_force_one(char **cur,
struct ata_force_ent *force_ent,
const char **reason)
{
- static const struct ata_force_param force_tbl[] __initconst = {
- { "40c", .cbl = ATA_CBL_PATA40 },
- { "80c", .cbl = ATA_CBL_PATA80 },
- { "short40c", .cbl = ATA_CBL_PATA40_SHORT },
- { "unk", .cbl = ATA_CBL_PATA_UNK },
- { "ign", .cbl = ATA_CBL_PATA_IGN },
- { "sata", .cbl = ATA_CBL_SATA },
- { "1.5Gbps", .spd_limit = 1 },
- { "3.0Gbps", .spd_limit = 2 },
- { "noncq", .horkage_on = ATA_HORKAGE_NONCQ },
- { "ncq", .horkage_off = ATA_HORKAGE_NONCQ },
- { "noncqtrim", .horkage_on = ATA_HORKAGE_NO_NCQ_TRIM },
- { "ncqtrim", .horkage_off = ATA_HORKAGE_NO_NCQ_TRIM },
- { "noncqati", .horkage_on = ATA_HORKAGE_NO_NCQ_ON_ATI },
- { "ncqati", .horkage_off = ATA_HORKAGE_NO_NCQ_ON_ATI },
- { "dump_id", .horkage_on = ATA_HORKAGE_DUMP_ID },
- { "pio0", .xfer_mask = 1 << (ATA_SHIFT_PIO + 0) },
- { "pio1", .xfer_mask = 1 << (ATA_SHIFT_PIO + 1) },
- { "pio2", .xfer_mask = 1 << (ATA_SHIFT_PIO + 2) },
- { "pio3", .xfer_mask = 1 << (ATA_SHIFT_PIO + 3) },
- { "pio4", .xfer_mask = 1 << (ATA_SHIFT_PIO + 4) },
- { "pio5", .xfer_mask = 1 << (ATA_SHIFT_PIO + 5) },
- { "pio6", .xfer_mask = 1 << (ATA_SHIFT_PIO + 6) },
- { "mwdma0", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 0) },
- { "mwdma1", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 1) },
- { "mwdma2", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 2) },
- { "mwdma3", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 3) },
- { "mwdma4", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 4) },
- { "udma0", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 0) },
- { "udma16", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 0) },
- { "udma/16", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 0) },
- { "udma1", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 1) },
- { "udma25", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 1) },
- { "udma/25", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 1) },
- { "udma2", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 2) },
- { "udma33", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 2) },
- { "udma/33", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 2) },
- { "udma3", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 3) },
- { "udma44", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 3) },
- { "udma/44", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 3) },
- { "udma4", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 4) },
- { "udma66", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 4) },
- { "udma/66", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 4) },
- { "udma5", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 5) },
- { "udma100", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 5) },
- { "udma/100", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 5) },
- { "udma6", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 6) },
- { "udma133", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 6) },
- { "udma/133", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 6) },
- { "udma7", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 7) },
- { "nohrst", .lflags = ATA_LFLAG_NO_HRST },
- { "nosrst", .lflags = ATA_LFLAG_NO_SRST },
- { "norst", .lflags = ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST },
- { "rstonce", .lflags = ATA_LFLAG_RST_ONCE },
- { "atapi_dmadir", .horkage_on = ATA_HORKAGE_ATAPI_DMADIR },
- { "disable", .horkage_on = ATA_HORKAGE_DISABLE },
- };
char *start = *cur, *p = *cur;
char *id, *val, *endp;
const struct ata_force_param *match_fp = NULL;
@@ -6288,7 +6295,7 @@ static void __init ata_parse_force_param(void)
int last_port = -1, last_device = -1;
char *p, *cur, *next;
- /* calculate maximum number of params and allocate force_tbl */
+ /* Calculate maximum number of params and allocate ata_force_tbl */
for (p = ata_force_param_buf; *p; p++)
if (*p == ',')
size++;
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index 044a16daa2d4..7a5fe41aa5ae 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -1268,31 +1268,6 @@ int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap)
}
EXPORT_SYMBOL_GPL(ata_sas_queuecmd);
-int ata_sas_allocate_tag(struct ata_port *ap)
-{
- unsigned int max_queue = ap->host->n_tags;
- unsigned int i, tag;
-
- for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) {
- tag = tag < max_queue ? tag : 0;
-
- /* the last tag is reserved for internal command. */
- if (ata_tag_internal(tag))
- continue;
-
- if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
- ap->sas_last_tag = tag;
- return tag;
- }
- }
- return -1;
-}
-
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap)
-{
- clear_bit(tag, &ap->sas_tag_allocated);
-}
-
/**
* sata_async_notification - SATA async notification handler
* @ap: ATA port where async notification is received
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 06c9d90238d9..42cecf95a4e5 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -638,24 +638,48 @@ EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
struct scsi_cmnd *cmd)
{
+ struct ata_port *ap = dev->link->ap;
struct ata_queued_cmd *qc;
+ int tag;
- qc = ata_qc_new_init(dev, scsi_cmd_to_rq(cmd)->tag);
- if (qc) {
- qc->scsicmd = cmd;
- qc->scsidone = scsi_done;
-
- qc->sg = scsi_sglist(cmd);
- qc->n_elem = scsi_sg_count(cmd);
+ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+ goto fail;
- if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
- qc->flags |= ATA_QCFLAG_QUIET;
+ if (ap->flags & ATA_FLAG_SAS_HOST) {
+ /*
+ * SAS hosts may queue > ATA_MAX_QUEUE commands so use
+ * unique per-device budget token as a tag.
+ */
+ if (WARN_ON_ONCE(cmd->budget_token >= ATA_MAX_QUEUE))
+ goto fail;
+ tag = cmd->budget_token;
} else {
- cmd->result = (DID_OK << 16) | SAM_STAT_TASK_SET_FULL;
- scsi_done(cmd);
+ tag = scsi_cmd_to_rq(cmd)->tag;
}
+ qc = __ata_qc_from_tag(ap, tag);
+ qc->tag = qc->hw_tag = tag;
+ qc->ap = ap;
+ qc->dev = dev;
+
+ ata_qc_reinit(qc);
+
+ qc->scsicmd = cmd;
+ qc->scsidone = scsi_done;
+
+ qc->sg = scsi_sglist(cmd);
+ qc->n_elem = scsi_sg_count(cmd);
+
+ if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET)
+ qc->flags |= ATA_QCFLAG_QUIET;
+
return qc;
+
+fail:
+ set_host_byte(cmd, DID_OK);
+ set_status_byte(cmd, SAM_STAT_TASK_SET_FULL);
+ scsi_done(cmd);
+ return NULL;
}
static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index c9c2496d91ea..926a7f41303d 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -44,7 +44,6 @@ static inline void ata_force_cbl(struct ata_port *ap) { }
#endif
extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
u64 block, u32 n_block, unsigned int tf_flags,
unsigned int tag, int class);
@@ -91,18 +90,6 @@ extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
#define to_ata_port(d) container_of(d, struct ata_port, tdev)
-/* libata-sata.c */
-#ifdef CONFIG_SATA_HOST
-int ata_sas_allocate_tag(struct ata_port *ap);
-void ata_sas_free_tag(unsigned int tag, struct ata_port *ap);
-#else
-static inline int ata_sas_allocate_tag(struct ata_port *ap)
-{
- return -EOPNOTSUPP;
-}
-static inline void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) { }
-#endif
-
/* libata-acpi.c */
#ifdef CONFIG_ATA_ACPI
extern unsigned int ata_acpi_gtf_filter;
diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c
index 2e35505b683c..0117df0fe3c5 100644
--- a/drivers/ata/pata_ftide010.c
+++ b/drivers/ata/pata_ftide010.c
@@ -536,8 +536,8 @@ static int pata_ftide010_probe(struct platform_device *pdev)
return 0;
err_dis_clk:
- if (!IS_ERR(ftide->pclk))
- clk_disable_unprepare(ftide->pclk);
+ clk_disable_unprepare(ftide->pclk);
+
return ret;
}
@@ -547,8 +547,7 @@ static int pata_ftide010_remove(struct platform_device *pdev)
struct ftide010 *ftide = host->private_data;
ata_host_detach(ftide->host);
- if (!IS_ERR(ftide->pclk))
- clk_disable_unprepare(ftide->pclk);
+ clk_disable_unprepare(ftide->pclk);
return 0;
}
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index 0c5a51970fbf..014ccb0f45dc 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -77,6 +77,8 @@ static int marvell_cable_detect(struct ata_port *ap)
switch(ap->port_no)
{
case 0:
+ if (!ap->ioaddr.bmdma_addr)
+ return ATA_CBL_PATA_UNK;
if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1)
return ATA_CBL_PATA40;
return ATA_CBL_PATA80;
diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c
index 3250ef317df6..03b6ae37a578 100644
--- a/drivers/ata/pata_mpc52xx.c
+++ b/drivers/ata/pata_mpc52xx.c
@@ -19,11 +19,12 @@
#include <linux/gfp.h>
#include <linux/delay.h>
#include <linux/libata.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/types.h>
#include <asm/cacheflush.h>
-#include <asm/prom.h>
#include <asm/mpc52xx.h>
#include <linux/fsl/bestcomm/bestcomm.h>
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index 0da58ce20d82..67ef2e26d7df 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -47,11 +47,9 @@
* criticial.
*/
-static unsigned long sil680_selreg(struct ata_port *ap, int r)
+static int sil680_selreg(struct ata_port *ap, int r)
{
- unsigned long base = 0xA0 + r;
- base += (ap->port_no << 4);
- return base;
+ return 0xA0 + (ap->port_no << 4) + r;
}
/**
@@ -65,12 +63,9 @@ static unsigned long sil680_selreg(struct ata_port *ap, int r)
* the unit shift.
*/
-static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r)
+static int sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r)
{
- unsigned long base = 0xA0 + r;
- base += (ap->port_no << 4);
- base |= adev->devno ? 2 : 0;
- return base;
+ return 0xA0 + (ap->port_no << 4) + r + (adev->devno << 1);
}
@@ -85,8 +80,9 @@ static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev,
static int sil680_cable_detect(struct ata_port *ap)
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
- unsigned long addr = sil680_selreg(ap, 0);
+ int addr = sil680_selreg(ap, 0);
u8 ata66;
+
pci_read_config_byte(pdev, addr, &ata66);
if (ata66 & 1)
return ATA_CBL_PATA80;
@@ -113,9 +109,9 @@ static void sil680_set_piomode(struct ata_port *ap, struct ata_device *adev)
0x328A, 0x2283, 0x1281, 0x10C3, 0x10C1
};
- unsigned long tfaddr = sil680_selreg(ap, 0x02);
- unsigned long addr = sil680_seldev(ap, adev, 0x04);
- unsigned long addr_mask = 0x80 + 4 * ap->port_no;
+ int tfaddr = sil680_selreg(ap, 0x02);
+ int addr = sil680_seldev(ap, adev, 0x04);
+ int addr_mask = 0x80 + 4 * ap->port_no;
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
int pio = adev->pio_mode - XFER_PIO_0;
int lowest_pio = pio;
@@ -165,9 +161,9 @@ static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev)
static const u16 dma_table[3] = { 0x2208, 0x10C2, 0x10C1 };
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
- unsigned long ma = sil680_seldev(ap, adev, 0x08);
- unsigned long ua = sil680_seldev(ap, adev, 0x0C);
- unsigned long addr_mask = 0x80 + 4 * ap->port_no;
+ int ma = sil680_seldev(ap, adev, 0x08);
+ int ua = sil680_seldev(ap, adev, 0x0C);
+ int addr_mask = 0x80 + 4 * ap->port_no;
int port_shift = adev->devno * 4;
u8 scsc, mode;
u16 multi, ultra;
@@ -219,7 +215,7 @@ static void sil680_sff_exec_command(struct ata_port *ap,
static bool sil680_sff_irq_check(struct ata_port *ap)
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
- unsigned long addr = sil680_selreg(ap, 1);
+ int addr = sil680_selreg(ap, 1);
u8 val;
pci_read_config_byte(pdev, addr, &val);
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 439ca882f73c..215c02d4056a 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -248,9 +248,9 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev,
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
struct ata_device *peer = ata_dev_pair(adev);
struct ata_timing t, p;
- static int via_clock = 33333; /* Bus clock in kHZ */
- unsigned long T = 1000000000 / via_clock;
- unsigned long UT = T;
+ const int via_clock = 33333; /* Bus clock in kHz */
+ const int T = 1000000000 / via_clock;
+ int UT = T;
int ut;
int offset = 3 - (2*ap->port_no) - adev->devno;
diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c
index 00e1c7941d0e..b729e9919bb0 100644
--- a/drivers/ata/sata_gemini.c
+++ b/drivers/ata/sata_gemini.c
@@ -318,7 +318,6 @@ static int gemini_sata_probe(struct platform_device *pdev)
struct device_node *np = dev->of_node;
struct sata_gemini *sg;
struct regmap *map;
- struct resource *res;
enum gemini_muxmode muxmode;
u32 gmode;
u32 gmask;
@@ -329,11 +328,7 @@ static int gemini_sata_probe(struct platform_device *pdev)
return -ENOMEM;
sg->dev = dev;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENODEV;
-
- sg->base = devm_ioremap_resource(dev, res);
+ sg->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(sg->base))
return PTR_ERR(sg->base);
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 1d6636ebaac5..f73b836047cf 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -667,6 +667,15 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
core_mask = &cpu_topology[cpu].llc_sibling;
}
+ /*
+ * For systems with no shared cpu-side LLC but with clusters defined,
+ * extend core_mask to cluster_siblings. The sched domain builder will
+ * then remove MC as redundant with CLS if SCHED_CLUSTER is enabled.
+ */
+ if (IS_ENABLED(CONFIG_SCHED_CLUSTER) &&
+ cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling))
+ core_mask = &cpu_topology[cpu].cluster_sibling;
+
return core_mask;
}
@@ -684,7 +693,7 @@ void update_siblings_masks(unsigned int cpuid)
for_each_online_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
- if (cpuid_topo->llc_id == cpu_topo->llc_id) {
+ if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
}
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 94d1789a233e..406a907a4cae 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -735,6 +735,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
size_t offset, u32 opt_flags)
{
struct firmware *fw = NULL;
+ struct cred *kern_cred = NULL;
+ const struct cred *old_cred;
bool nondirect = false;
int ret;
@@ -751,6 +753,18 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
if (ret <= 0) /* error or already assigned */
goto out;
+ /*
+ * We are about to try to access the firmware file. Because we may have been
+ * called by a driver when serving an unrelated request from userland, we use
+ * the kernel credentials to read the file.
+ */
+ kern_cred = prepare_kernel_cred(NULL);
+ if (!kern_cred) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ old_cred = override_creds(kern_cred);
+
ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL);
/* Only full reads can support decompression, platform, and sysfs. */
@@ -776,6 +790,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
} else
ret = assign_fw(fw, device);
+ revert_creds(old_cred);
+ put_cred(kern_cred);
+
out:
if (ret < 0) {
fw_abort_batch_reqs(fw);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index e9d1efcda89b..ac6ad9ab67f9 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -152,9 +152,19 @@ static struct attribute *default_attrs[] = {
NULL
};
+static umode_t topology_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ if (attr == &dev_attr_ppin.attr && !topology_ppin(kobj_to_dev(kobj)->id))
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group topology_attr_group = {
.attrs = default_attrs,
.bin_attrs = bin_attrs,
+ .is_visible = topology_is_visible,
.name = "topology"
};
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 519b6d38d4df..fdb81f2794cd 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -33,6 +33,22 @@ config BLK_DEV_FD
To compile this driver as a module, choose M here: the
module will be called floppy.
+config BLK_DEV_FD_RAWCMD
+ bool "Support for raw floppy disk commands (DEPRECATED)"
+ depends on BLK_DEV_FD
+ help
+ If you want to use actual physical floppies and expect to do
+ special low-level hardware accesses to them (access and use
+ non-standard formats, for example), then enable this.
+
+ Note that the code enabled by this option is rarely used and
+ might be unstable or insecure, and distros should not enable it.
+
+ Note: FDRAWCMD is deprecated and will be removed from the kernel
+ in the near future.
+
+ If unsure, say N.
+
config AMIGA_FLOPPY
tristate "Amiga floppy support"
depends on AMIGA
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 84d0fcebd6af..749ae1246f4c 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -244,3 +244,5 @@ void aoenet_exit(void);
void aoenet_xmit(struct sk_buff_head *);
int is_aoe_netif(struct net_device *ifp);
int set_aoe_iflist(const char __user *str, size_t size);
+
+extern struct workqueue_struct *aoe_wq;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 8a91fcac6f82..348adf335217 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -435,7 +435,7 @@ err_mempool:
err:
spin_lock_irqsave(&d->lock, flags);
d->flags &= ~DEVFL_GD_NOW;
- schedule_work(&d->work);
+ queue_work(aoe_wq, &d->work);
spin_unlock_irqrestore(&d->lock, flags);
}
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 384073ef2323..d7317425be51 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -968,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
d->flags |= DEVFL_NEWSIZE;
else
d->flags |= DEVFL_GDALLOC;
- schedule_work(&d->work);
+ queue_work(aoe_wq, &d->work);
}
static void
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index c5753c6bfe80..b381d1c3ef32 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
specified = 1;
}
- flush_scheduled_work();
+ flush_workqueue(aoe_wq);
/* pass one: do aoedev_downdev, which might sleep */
restart1:
spin_lock_irqsave(&devlist_lock, flags);
@@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
void
aoedev_exit(void)
{
- flush_scheduled_work();
+ flush_workqueue(aoe_wq);
flush(NULL, 0, EXITING);
}
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 1e4e2971171c..6238c4c87cfc 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
MODULE_VERSION(VERSION);
static struct timer_list timer;
+struct workqueue_struct *aoe_wq;
static void discover_timer(struct timer_list *t)
{
@@ -35,6 +36,7 @@ aoe_exit(void)
aoechr_exit();
aoedev_exit();
aoeblk_exit(); /* free cache after de-allocating bufs */
+ destroy_workqueue(aoe_wq);
}
static int __init
@@ -42,9 +44,13 @@ aoe_init(void)
{
int ret;
+ aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
+ if (!aoe_wq)
+ return -ENOMEM;
+
ret = aoedev_init();
if (ret)
- return ret;
+ goto dev_fail;
ret = aoechr_init();
if (ret)
goto chr_fail;
@@ -77,6 +83,8 @@ aoe_init(void)
aoechr_exit();
chr_fail:
aoedev_exit();
+ dev_fail:
+ destroy_workqueue(aoe_wq);
printk(KERN_INFO "aoe: initialisation failure.\n");
return ret;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 5d819a466e2f..e232cc4fd444 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -303,6 +303,7 @@ static struct atari_floppy_struct {
int ref;
int type;
struct blk_mq_tag_set tag_set;
+ int error_count;
} unit[FD_MAX_UNITS];
#define UD unit[drive]
@@ -705,14 +706,14 @@ static void fd_error( void )
if (!fd_request)
return;
- fd_request->error_count++;
- if (fd_request->error_count >= MAX_ERRORS) {
+ unit[SelectedDrive].error_count++;
+ if (unit[SelectedDrive].error_count >= MAX_ERRORS) {
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
fd_end_request_cur(BLK_STS_IOERR);
finish_fdc();
return;
}
- else if (fd_request->error_count == RECALIBRATE_ERRORS) {
+ else if (unit[SelectedDrive].error_count == RECALIBRATE_ERRORS) {
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
if (SelectedDrive != -1)
SUD.track = -1;
@@ -1491,7 +1492,7 @@ static void setup_req_params( int drive )
ReqData = ReqBuffer + 512 * ReqCnt;
if (UseTrackbuffer)
- read_track = (ReqCmd == READ && fd_request->error_count == 0);
+ read_track = (ReqCmd == READ && unit[drive].error_count == 0);
else
read_track = 0;
@@ -1520,6 +1521,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_RESOURCE;
}
fd_request = bd->rq;
+ unit[drive].error_count = 0;
blk_mq_start_request(fd_request);
atari_disable_irq( IRQ_MFP_FDC );
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index df25eecf80af..9e060e49b3f8 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
}
}
- want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+ want = PFN_UP(words*sizeof(long));
have = b->bm_number_of_pages;
if (want == have) {
D_ASSERT(device, b->bm_pages != NULL);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 4b0b25cc916e..2887350ae010 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -903,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
}
-/* communicated if (agreed_features & DRBD_FF_WSAME) */
-static void
-assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
- struct request_queue *q)
-{
- if (q) {
- p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
- p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
- p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q));
- p->qlim->io_min = cpu_to_be32(queue_io_min(q));
- p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
- p->qlim->discard_enabled = blk_queue_discard(q);
- p->qlim->write_same_capable = 0;
- } else {
- q = device->rq_queue;
- p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
- p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q));
- p->qlim->alignment_offset = 0;
- p->qlim->io_min = cpu_to_be32(queue_io_min(q));
- p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
- p->qlim->discard_enabled = 0;
- p->qlim->write_same_capable = 0;
- }
-}
-
int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags)
{
struct drbd_device *device = peer_device->device;
@@ -949,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
memset(p, 0, packet_size);
if (get_ldev_if_state(device, D_NEGOTIATING)) {
- struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+ struct block_device *bdev = device->ldev->backing_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+
d_size = drbd_get_max_capacity(device->ldev);
rcu_read_lock();
u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
@@ -957,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
q_order_type = drbd_queue_order_type(device);
max_bio_size = queue_max_hw_sectors(q) << 9;
max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE);
- assign_p_sizes_qlim(device, p, q);
+ p->qlim->physical_block_size =
+ cpu_to_be32(bdev_physical_block_size(bdev));
+ p->qlim->logical_block_size =
+ cpu_to_be32(bdev_logical_block_size(bdev));
+ p->qlim->alignment_offset =
+ cpu_to_be32(bdev_alignment_offset(bdev));
+ p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev));
+ p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev));
+ p->qlim->discard_enabled = !!bdev_max_discard_sectors(bdev);
put_ldev(device);
} else {
+ struct request_queue *q = device->rq_queue;
+
+ p->qlim->physical_block_size =
+ cpu_to_be32(queue_physical_block_size(q));
+ p->qlim->logical_block_size =
+ cpu_to_be32(queue_logical_block_size(q));
+ p->qlim->alignment_offset = 0;
+ p->qlim->io_min = cpu_to_be32(queue_io_min(q));
+ p->qlim->io_opt = cpu_to_be32(queue_io_opt(q));
+ p->qlim->discard_enabled = 0;
+
d_size = 0;
u_size = 0;
q_order_type = QUEUE_ORDERED_NONE;
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
- assign_p_sizes_qlim(device, p, NULL);
}
if (peer_device->connection->agreed_pro_version <= 94)
@@ -3586,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
* when we want to support more than
* one PRO_VERSION */
static const char *cmdnames[] = {
+
[P_DATA] = "Data",
- [P_WSAME] = "WriteSame",
- [P_TRIM] = "Trim",
[P_DATA_REPLY] = "DataReply",
[P_RS_DATA_REPLY] = "RSDataReply",
[P_BARRIER] = "Barrier",
@@ -3599,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
[P_DATA_REQUEST] = "DataRequest",
[P_RS_DATA_REQUEST] = "RSDataRequest",
[P_SYNC_PARAM] = "SyncParam",
- [P_SYNC_PARAM89] = "SyncParam89",
[P_PROTOCOL] = "ReportProtocol",
[P_UUIDS] = "ReportUUIDs",
[P_SIZES] = "ReportSizes",
@@ -3607,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
[P_SYNC_UUID] = "ReportSyncUUID",
[P_AUTH_CHALLENGE] = "AuthChallenge",
[P_AUTH_RESPONSE] = "AuthResponse",
+ [P_STATE_CHG_REQ] = "StateChgRequest",
[P_PING] = "Ping",
[P_PING_ACK] = "PingAck",
[P_RECV_ACK] = "RecvAck",
@@ -3617,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
[P_NEG_DREPLY] = "NegDReply",
[P_NEG_RS_DREPLY] = "NegRSDReply",
[P_BARRIER_ACK] = "BarrierAck",
- [P_STATE_CHG_REQ] = "StateChgRequest",
[P_STATE_CHG_REPLY] = "StateChgReply",
[P_OV_REQUEST] = "OVRequest",
[P_OV_REPLY] = "OVReply",
[P_OV_RESULT] = "OVResult",
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
+ [P_SYNC_PARAM89] = "SyncParam89",
[P_COMPRESSED_BITMAP] = "CBitmap",
[P_DELAY_PROBE] = "DelayProbe",
[P_OUT_OF_SYNC] = "OutOfSync",
- [P_RETRY_WRITE] = "RetryWrite",
[P_RS_CANCEL] = "RSCancel",
[P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
[P_PROTOCOL_UPDATE] = "protocol_update",
+ [P_TRIM] = "Trim",
[P_RS_THIN_REQ] = "rs_thin_req",
[P_RS_DEALLOCATED] = "rs_deallocated",
+ [P_WSAME] = "WriteSame",
+ [P_ZEROES] = "Zeroes",
/* enum drbd_packet, but not commands - obsoleted flags:
* P_MAY_IGNORE
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index b7216c186ba4..013d355a2033 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
struct set_role_parms parms;
int err;
enum drbd_ret_code retcode;
+ enum drbd_state_rv rv;
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
@@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
- retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
- R_PRIMARY, parms.assume_uptodate);
+ rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
else
- retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
- R_SECONDARY, 0);
+ rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
mutex_unlock(&adm_ctx.resource->adm_mutex);
genl_lock();
+ drbd_adm_finish(&adm_ctx, info, rv);
+ return 0;
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -1204,50 +1205,40 @@ static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection)
}
static void decide_on_discard_support(struct drbd_device *device,
- struct request_queue *q,
- struct request_queue *b,
- bool discard_zeroes_if_aligned)
+ struct drbd_backing_dev *bdev)
{
- /* q = drbd device queue (device->rq_queue)
- * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue),
- * or NULL if diskless
- */
- struct drbd_connection *connection = first_peer_device(device)->connection;
- bool can_do = b ? blk_queue_discard(b) : true;
-
- if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) {
- can_do = false;
- drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n");
- }
- if (can_do) {
- /* We don't care for the granularity, really.
- * Stacking limits below should fix it for the local
- * device. Whether or not it is a suitable granularity
- * on the remote device is not our problem, really. If
- * you care, you need to use devices with similar
- * topology on all peers. */
- blk_queue_discard_granularity(q, 512);
- q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
- q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
- } else {
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
- blk_queue_discard_granularity(q, 0);
- q->limits.max_discard_sectors = 0;
- q->limits.max_write_zeroes_sectors = 0;
- }
-}
+ struct drbd_connection *connection =
+ first_peer_device(device)->connection;
+ struct request_queue *q = device->rq_queue;
-static void fixup_discard_if_not_supported(struct request_queue *q)
-{
- /* To avoid confusion, if this queue does not support discard, clear
- * max_discard_sectors, which is what lsblk -D reports to the user.
- * Older kernels got this wrong in "stack limits".
- * */
- if (!blk_queue_discard(q)) {
- blk_queue_max_discard_sectors(q, 0);
- blk_queue_discard_granularity(q, 0);
+ if (bdev && !bdev_max_discard_sectors(bdev->backing_bdev))
+ goto not_supported;
+
+ if (connection->cstate >= C_CONNECTED &&
+ !(connection->agreed_features & DRBD_FF_TRIM)) {
+ drbd_info(connection,
+ "peer DRBD too old, does not support TRIM: disabling discards\n");
+ goto not_supported;
}
+
+ /*
+ * We don't care for the granularity, really.
+ *
+ * Stacking limits below should fix it for the local device. Whether or
+ * not it is a suitable granularity on the remote device is not our
+ * problem, really. If you care, you need to use devices with similar
+ * topology on all peers.
+ */
+ blk_queue_discard_granularity(q, 512);
+ q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
+ q->limits.max_write_zeroes_sectors =
+ drbd_max_discard_sectors(connection);
+ return;
+
+not_supported:
+ blk_queue_discard_granularity(q, 0);
+ q->limits.max_discard_sectors = 0;
+ q->limits.max_write_zeroes_sectors = 0;
}
static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
@@ -1273,7 +1264,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
unsigned int max_segments = 0;
struct request_queue *b = NULL;
struct disk_conf *dc;
- bool discard_zeroes_if_aligned = true;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
@@ -1282,7 +1272,6 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
rcu_read_lock();
dc = rcu_dereference(device->ldev->disk_conf);
max_segments = dc->max_bio_bvecs;
- discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
rcu_read_unlock();
blk_set_stacking_limits(&q->limits);
@@ -1292,13 +1281,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
/* This is the workaround for "bio would need to, but cannot, be split" */
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
- decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
+ decide_on_discard_support(device, bdev);
if (b) {
blk_stack_limits(&q->limits, &b->limits, 0);
disk_update_readahead(device->vdisk);
}
- fixup_discard_if_not_supported(q);
fixup_write_zeroes(device, q);
}
@@ -1437,14 +1425,14 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf,
struct drbd_backing_dev *nbc)
{
- struct request_queue * const q = nbc->backing_bdev->bd_disk->queue;
+ struct block_device *bdev = nbc->backing_bdev;
if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
if (disk_conf->al_extents > drbd_al_extents_max(nbc))
disk_conf->al_extents = drbd_al_extents_max(nbc);
- if (!blk_queue_discard(q)) {
+ if (!bdev_max_discard_sectors(bdev)) {
if (disk_conf->rs_discard_granularity) {
disk_conf->rs_discard_granularity = 0; /* disable feature */
drbd_info(device, "rs_discard_granularity feature disabled\n");
@@ -1453,16 +1441,19 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
if (disk_conf->rs_discard_granularity) {
int orig_value = disk_conf->rs_discard_granularity;
+ sector_t discard_size = bdev_max_discard_sectors(bdev) << 9;
+ unsigned int discard_granularity = bdev_discard_granularity(bdev);
int remainder;
- if (q->limits.discard_granularity > disk_conf->rs_discard_granularity)
- disk_conf->rs_discard_granularity = q->limits.discard_granularity;
+ if (discard_granularity > disk_conf->rs_discard_granularity)
+ disk_conf->rs_discard_granularity = discard_granularity;
- remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity;
+ remainder = disk_conf->rs_discard_granularity %
+ discard_granularity;
disk_conf->rs_discard_granularity += remainder;
- if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9)
- disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9;
+ if (disk_conf->rs_discard_granularity > discard_size)
+ disk_conf->rs_discard_granularity = discard_size;
if (disk_conf->rs_discard_granularity != orig_value)
drbd_info(device, "rs_discard_granularity changed to %d\n",
@@ -1611,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
drbd_send_sync_param(peer_device);
}
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu(old_disk_conf);
kfree(old_plan);
mod_timer(&device->request_timer, jiffies + HZ);
goto success;
@@ -2443,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
mutex_unlock(&connection->resource->conf_update);
mutex_unlock(&connection->data.mutex);
- synchronize_rcu();
- kfree(old_net_conf);
+ kvfree_rcu(old_net_conf);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
struct drbd_peer_device *peer_device;
@@ -2502,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
struct drbd_resource *resource;
struct drbd_connection *connection;
enum drbd_ret_code retcode;
+ enum drbd_state_rv rv;
int i;
int err;
@@ -2621,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
}
rcu_read_unlock();
- retcode = (enum drbd_ret_code)conn_request_state(connection,
- NS(conn, C_UNCONNECTED), CS_VERBOSE);
+ rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
conn_reconfig_done(connection);
mutex_unlock(&adm_ctx.resource->adm_mutex);
- drbd_adm_finish(&adm_ctx, info, retcode);
+ drbd_adm_finish(&adm_ctx, info, rv);
return 0;
fail:
@@ -2734,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
rv = conn_try_disconnect(connection, parms.force_disconnect);
- if (rv < SS_SUCCESS)
- retcode = (enum drbd_ret_code)rv;
- else
- retcode = NO_ERROR;
mutex_unlock(&adm_ctx.resource->adm_mutex);
+ if (rv < SS_SUCCESS) {
+ drbd_adm_finish(&adm_ctx, info, rv);
+ return 0;
+ }
+ retcode = NO_ERROR;
fail:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
@@ -2857,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
new_disk_conf->disk_size = (sector_t)rs.resize_size;
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&device->resource->conf_update);
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu(old_disk_conf);
new_disk_conf = NULL;
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 08da922f81d1..6762be53f409 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req;
struct page *page = NULL;
- unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ unsigned int nr_pages = PFN_UP(payload_size);
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
return NULL;
@@ -1511,7 +1511,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
{
struct block_device *bdev = device->ldev->backing_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
sector_t tmp, nr;
unsigned int max_discard_sectors, granularity;
int alignment;
@@ -1521,10 +1520,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
goto zero_out;
/* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
+ granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
- max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+ max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto zero_out;
@@ -1548,7 +1547,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
start = tmp;
}
while (nr_sectors >= max_discard_sectors) {
- err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
+ err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
+ GFP_NOIO);
nr_sectors -= max_discard_sectors;
start += max_discard_sectors;
}
@@ -1560,7 +1560,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
nr = nr_sectors;
nr -= (unsigned int)nr % granularity;
if (nr) {
- err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+ err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
nr_sectors -= nr;
start += nr;
}
@@ -1575,11 +1575,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
static bool can_do_reliable_discards(struct drbd_device *device)
{
- struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
struct disk_conf *dc;
bool can_do;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
return false;
rcu_read_lock();
@@ -1629,9 +1628,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
struct bio *bio;
struct page *page = peer_req->pages;
sector_t sector = peer_req->i.sector;
- unsigned data_size = peer_req->i.size;
- unsigned n_bios = 0;
- unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ unsigned int data_size = peer_req->i.size;
+ unsigned int n_bios = 0;
+ unsigned int nr_pages = PFN_UP(data_size);
/* TRIM/DISCARD: for now, always use the helper function
* blkdev_issue_zeroout(..., discard=true).
@@ -3751,8 +3750,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
drbd_info(connection, "peer data-integrity-alg: %s\n",
integrity_alg[0] ? integrity_alg : "(none)");
- synchronize_rcu();
- kfree(old_net_conf);
+ kvfree_rcu(old_net_conf);
return 0;
disconnect_rcu_unlock:
@@ -3903,7 +3901,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
drbd_err(device, "verify-alg of wrong size, "
"peer wants %u, accepting only up to %u byte\n",
data_size, SHARED_SECRET_MAX);
- err = -EIO;
goto reconnect;
}
@@ -4121,8 +4118,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&connection->resource->conf_update);
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu(old_disk_conf);
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
(unsigned long)p_usize, (unsigned long)my_usize);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 75be0e16770a..e64bcfba30ef 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -922,7 +922,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
switch (rbm) {
case RB_CONGESTED_REMOTE:
- return 0;
+ return false;
case RB_LEAST_PENDING:
return atomic_read(&device->local_cnt) >
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 4ee11aef6672..3f7bf9f2d874 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
conn_free_crypto(connection);
mutex_unlock(&connection->resource->conf_update);
- synchronize_rcu();
- kfree(old_conf);
+ kvfree_rcu(old_conf);
}
if (ns_max.susp_fen) {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 0f9956f4e9c4..af3051dd8912 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1030,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
{
if (drbd_peer_req_has_active_page(peer_req)) {
/* This might happen if sendpage() has not finished */
- int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ int i = PFN_UP(peer_req->i.size);
atomic_add(i, &device->pp_in_use_by_net);
atomic_sub(i, &device->pp_in_use);
spin_lock_irq(&device->resource->req_lock);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8c647532e3ce..015841f50f4e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -509,8 +509,8 @@ static unsigned long fdc_busy;
static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
static DECLARE_WAIT_QUEUE_HEAD(command_done);
-/* Errors during formatting are counted here. */
-static int format_errors;
+/* errors encountered on the current (or last) request */
+static int floppy_errors;
/* Format request descriptor. */
static struct format_descr format_req;
@@ -530,7 +530,6 @@ static struct format_descr format_req;
static char *floppy_track_buffer;
static int max_buffer_sectors;
-static int *errors;
typedef void (*done_f)(int);
static const struct cont_t {
void (*interrupt)(void);
@@ -1455,7 +1454,7 @@ static int interpret_errors(void)
if (drive_params[current_drive].flags & FTD_MSG)
DPRINT("Over/Underrun - retrying\n");
bad = 0;
- } else if (*errors >= drive_params[current_drive].max_errors.reporting) {
+ } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) {
print_errors();
}
if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC)
@@ -2095,7 +2094,7 @@ static void bad_flp_intr(void)
if (!next_valid_format(current_drive))
return;
}
- err_count = ++(*errors);
+ err_count = ++floppy_errors;
INFBOUND(write_errors[current_drive].badness, err_count);
if (err_count > drive_params[current_drive].max_errors.abort)
cont->done(0);
@@ -2241,9 +2240,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
return -EINVAL;
}
format_req = *tmp_format_req;
- format_errors = 0;
cont = &format_cont;
- errors = &format_errors;
+ floppy_errors = 0;
ret = wait_til_done(redo_format, true);
if (ret == -EINTR)
return -EINTR;
@@ -2759,10 +2757,11 @@ static int set_next_request(void)
current_req = list_first_entry_or_null(&floppy_reqs, struct request,
queuelist);
if (current_req) {
- current_req->error_count = 0;
+ floppy_errors = 0;
list_del_init(&current_req->queuelist);
+ return 1;
}
- return current_req != NULL;
+ return 0;
}
/* Starts or continues processing request. Will automatically unlock the
@@ -2821,7 +2820,6 @@ do_request:
_floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format];
} else
probing = 0;
- errors = &(current_req->error_count);
tmp = make_raw_rw_request();
if (tmp < 2) {
request_done(tmp);
@@ -2982,6 +2980,8 @@ static const char *drive_name(int type, int drive)
return "(null)";
}
+#ifdef CONFIG_BLK_DEV_FD_RAWCMD
+
/* raw commands */
static void raw_cmd_done(int flag)
{
@@ -3181,6 +3181,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
return ret;
}
+static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
+ void __user *param)
+{
+ int ret;
+
+ pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n");
+
+ if (type)
+ return -EINVAL;
+ if (lock_fdc(drive))
+ return -EINTR;
+ set_floppy(drive);
+ ret = raw_cmd_ioctl(cmd, param);
+ if (ret == -EINTR)
+ return -EINTR;
+ process_fd_request();
+ return ret;
+}
+
+#else /* CONFIG_BLK_DEV_FD_RAWCMD */
+
+static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
+ void __user *param)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
+
static int invalidate_drive(struct block_device *bdev)
{
/* invalidate the buffer track to force a reread */
@@ -3369,7 +3398,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
{
int drive = (long)bdev->bd_disk->private_data;
int type = ITYPE(drive_state[drive].fd_device);
- int i;
int ret;
int size;
union inparam {
@@ -3520,16 +3548,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
outparam = &write_errors[drive];
break;
case FDRAWCMD:
- if (type)
- return -EINVAL;
- if (lock_fdc(drive))
- return -EINTR;
- set_floppy(drive);
- i = raw_cmd_ioctl(cmd, (void __user *)param);
- if (i == -EINTR)
- return -EINTR;
- process_fd_request();
- return i;
+ return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param);
case FDTWADDLE:
if (lock_fdc(drive))
return -EINTR;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a58595f5ee2c..e2cb51810e89 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1,54 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/drivers/block/loop.c
- *
- * Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
- * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Make real block number available to downstream transfer functions, enables
- * CBC (and relatives) mode encryption requiring unique IVs per data block.
- * Reed H. Petty, rhp@draper.net
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations write_begin is not available on the backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
+ * Copyright 1993 by Theodore Ts'o.
*/
-
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
@@ -59,7 +12,6 @@
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/wait.h>
-#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/init.h>
#include <linux/swap.h>
@@ -80,10 +32,62 @@
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
+#include <linux/uaccess.h>
+#include <linux/blk-mq.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/loop.h>
+
+/* Possible states of device */
+enum {
+ Lo_unbound,
+ Lo_bound,
+ Lo_rundown,
+ Lo_deleting,
+};
-#include "loop.h"
+struct loop_func_table;
+
+struct loop_device {
+ int lo_number;
+ loff_t lo_offset;
+ loff_t lo_sizelimit;
+ int lo_flags;
+ char lo_file_name[LO_NAME_SIZE];
+
+ struct file * lo_backing_file;
+ struct block_device *lo_device;
+
+ gfp_t old_gfp_mask;
+
+ spinlock_t lo_lock;
+ int lo_state;
+ spinlock_t lo_work_lock;
+ struct workqueue_struct *workqueue;
+ struct work_struct rootcg_work;
+ struct list_head rootcg_cmd_list;
+ struct list_head idle_worker_list;
+ struct rb_root worker_tree;
+ struct timer_list timer;
+ bool use_dio;
+ bool sysfs_inited;
+
+ struct request_queue *lo_queue;
+ struct blk_mq_tag_set tag_set;
+ struct gendisk *lo_disk;
+ struct mutex lo_mutex;
+ bool idr_visible;
+};
-#include <linux/uaccess.h>
+struct loop_cmd {
+ struct list_head list_entry;
+ bool use_aio; /* use AIO interface to handle I/O */
+ atomic_t ref; /* only for aio */
+ long ret;
+ struct kiocb iocb;
+ struct bio_vec *bvec;
+ struct cgroup_subsys_state *blkcg_css;
+ struct cgroup_subsys_state *memcg_css;
+};
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
#define LOOP_DEFAULT_HW_Q_DEPTH (128)
@@ -314,15 +318,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
mode |= FALLOC_FL_KEEP_SIZE;
- if (!blk_queue_discard(lo->lo_queue)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (!bdev_max_discard_sectors(lo->lo_device))
+ return -EOPNOTSUPP;
ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
- ret = -EIO;
- out:
+ return -EIO;
return ret;
}
@@ -572,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
if (!file)
return -EBADF;
+
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
is_loop = is_loop_device(file);
error = loop_global_lock_killable(lo, is_loop);
if (error)
@@ -626,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
fput(old_file);
if (partscan)
loop_reread_partitions(lo);
- return 0;
+
+ error = 0;
+done:
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+ return error;
out_err:
loop_global_unlock(lo, is_loop);
out_putf:
fput(file);
- return error;
+ goto done;
}
/* loop sysfs attributes */
@@ -762,7 +772,7 @@ static void loop_config_discard(struct loop_device *lo)
struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
- granularity = backingq->limits.discard_granularity ?:
+ granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
queue_physical_block_size(backingq);
/*
@@ -787,14 +797,11 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = granularity;
blk_queue_max_discard_sectors(q, max_discard_sectors);
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
} else {
q->limits.discard_granularity = 0;
blk_queue_max_discard_sectors(q, 0);
blk_queue_max_write_zeroes_sectors(q, 0);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
}
- q->limits.discard_alignment = 0;
}
struct loop_worker {
@@ -808,8 +815,6 @@ struct loop_worker {
};
static void loop_workfn(struct work_struct *work);
-static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
#ifdef CONFIG_BLK_CGROUP
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -893,6 +898,39 @@ queue_work:
spin_unlock_irq(&lo->lo_work_lock);
}
+static void loop_set_timer(struct loop_device *lo)
+{
+ timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
+{
+ struct loop_worker *pos, *worker;
+
+ spin_lock_irq(&lo->lo_work_lock);
+ list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+ idle_list) {
+ if (!delete_all &&
+ time_is_after_jiffies(worker->last_ran_at +
+ LOOP_IDLE_WORKER_TIMEOUT))
+ break;
+ list_del(&worker->idle_list);
+ rb_erase(&worker->rb_node, &lo->worker_tree);
+ css_put(worker->blkcg_css);
+ kfree(worker);
+ }
+ if (!list_empty(&lo->idle_worker_list))
+ loop_set_timer(lo);
+ spin_unlock_irq(&lo->lo_work_lock);
+}
+
+static void loop_free_idle_workers_timer(struct timer_list *timer)
+{
+ struct loop_device *lo = container_of(timer, struct loop_device, timer);
+
+ return loop_free_idle_workers(lo, false);
+}
+
static void loop_update_rotational(struct loop_device *lo)
{
struct file *file = lo->lo_backing_file;
@@ -903,7 +941,7 @@ static void loop_update_rotational(struct loop_device *lo)
/* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
if (file_bdev)
- nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
+ nonrot = bdev_nonrot(file_bdev);
if (nonrot)
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@ -967,6 +1005,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@@ -1011,24 +1052,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
- lo->workqueue = alloc_workqueue("loop%d",
- WQ_UNBOUND | WQ_FREEZABLE,
- 0,
- lo->lo_number);
if (!lo->workqueue) {
- error = -ENOMEM;
- goto out_unlock;
+ lo->workqueue = alloc_workqueue("loop%d",
+ WQ_UNBOUND | WQ_FREEZABLE,
+ 0, lo->lo_number);
+ if (!lo->workqueue) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
}
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
- INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
- INIT_LIST_HEAD(&lo->rootcg_cmd_list);
- INIT_LIST_HEAD(&lo->idle_worker_list);
- lo->worker_tree = RB_ROOT;
- timer_setup(&lo->timer, loop_free_idle_workers,
- TIMER_DEFERRABLE);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
@@ -1073,7 +1109,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
- return 0;
+
+ error = 0;
+done:
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+ return error;
out_unlock:
loop_global_unlock(lo, is_loop);
@@ -1084,53 +1125,24 @@ out_putf:
fput(file);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- return error;
+ goto done;
}
static void __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- struct loop_worker *pos, *worker;
-
- /*
- * Flush loop_configure() and loop_change_fd(). It is acceptable for
- * loop_validate_file() to succeed, for actual clear operation has not
- * started yet.
- */
- mutex_lock(&loop_validate_mutex);
- mutex_unlock(&loop_validate_mutex);
- /*
- * loop_validate_file() now fails because l->lo_state != Lo_bound
- * became visible.
- */
-
- /*
- * Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
- * after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
- * lo->lo_state has changed while waiting for lo->lo_mutex.
- */
- mutex_lock(&lo->lo_mutex);
- BUG_ON(lo->lo_state != Lo_rundown);
- mutex_unlock(&lo->lo_mutex);
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
blk_queue_write_cache(lo->lo_queue, false, false);
- /* freeze request queue during the transition */
- blk_mq_freeze_queue(lo->lo_queue);
-
- destroy_workqueue(lo->workqueue);
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- spin_unlock_irq(&lo->lo_work_lock);
- del_timer_sync(&lo->timer);
+ /*
+ * Freeze the request queue when unbinding on a live file descriptor and
+ * thus an open device. When called from ->release we are guaranteed
+ * that there is no I/O in progress already.
+ */
+ if (!release)
+ blk_mq_freeze_queue(lo->lo_queue);
spin_lock_irq(&lo->lo_lock);
filp = lo->lo_backing_file;
@@ -1151,7 +1163,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (!release)
+ blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
@@ -1202,11 +1215,20 @@ static int loop_clr_fd(struct loop_device *lo)
{
int err;
- err = mutex_lock_killable(&lo->lo_mutex);
+ /*
+ * Since lo_ioctl() is called without locks held, it is possible that
+ * loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
+ *
+ * Therefore, use global lock when setting Lo_rundown state in order to
+ * make sure that loop_validate_file() will fail if the "struct file"
+ * which loop_configure()/loop_change_fd() found via fget() was this
+ * loop device.
+ */
+ err = loop_global_lock_killable(lo, true);
if (err)
return err;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
return -ENXIO;
}
/*
@@ -1219,13 +1241,13 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
- if (atomic_read(&lo->lo_refcnt) > 1) {
+ if (disk_openers(lo->lo_disk) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
return 0;
}
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
__loop_clr_fd(lo, false);
return 0;
@@ -1257,15 +1279,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
- if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
- /* If any pages were dirtied after invalidate_bdev(), try again */
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
prev_lo_flags = lo->lo_flags;
err = loop_set_status_from_info(lo, info);
@@ -1476,21 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
-
- /* invalidate_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
return err;
@@ -1720,33 +1722,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
- struct loop_device *lo = bdev->bd_disk->private_data;
- int err;
-
- err = mutex_lock_killable(&lo->lo_mutex);
- if (err)
- return err;
- if (lo->lo_state == Lo_deleting)
- err = -ENXIO;
- else
- atomic_inc(&lo->lo_refcnt);
- mutex_unlock(&lo->lo_mutex);
- return err;
-}
-
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
- mutex_lock(&lo->lo_mutex);
- if (atomic_dec_return(&lo->lo_refcnt))
- goto out_unlock;
+ if (disk_openers(disk) > 0)
+ return;
- if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
- if (lo->lo_state != Lo_bound)
- goto out_unlock;
+ mutex_lock(&lo->lo_mutex);
+ if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
/*
@@ -1755,27 +1739,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
*/
__loop_clr_fd(lo, true);
return;
- } else if (lo->lo_state == Lo_bound) {
- /*
- * Otherwise keep thread (if running) and config,
- * but flush possible ongoing bios in thread.
- */
- blk_mq_freeze_queue(lo->lo_queue);
- blk_mq_unfreeze_queue(lo->lo_queue);
}
-
-out_unlock:
mutex_unlock(&lo->lo_mutex);
}
+static void lo_free_disk(struct gendisk *disk)
+{
+ struct loop_device *lo = disk->private_data;
+
+ if (lo->workqueue)
+ destroy_workqueue(lo->workqueue);
+ loop_free_idle_workers(lo, true);
+ del_timer_sync(&lo->timer);
+ mutex_destroy(&lo->lo_mutex);
+ kfree(lo);
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
- .open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = lo_compat_ioctl,
#endif
+ .free_disk = lo_free_disk,
};
/*
@@ -1834,12 +1821,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
cmd->blkcg_css = NULL;
cmd->memcg_css = NULL;
#ifdef CONFIG_BLK_CGROUP
- if (rq->bio && rq->bio->bi_blkg) {
- cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+ if (rq->bio) {
+ cmd->blkcg_css = bio_blkcg_css(rq->bio);
#ifdef CONFIG_MEMCG
- cmd->memcg_css =
- cgroup_get_e_css(cmd->blkcg_css->cgroup,
- &memory_cgrp_subsys);
+ if (cmd->blkcg_css) {
+ cmd->memcg_css =
+ cgroup_get_e_css(cmd->blkcg_css->cgroup,
+ &memory_cgrp_subsys);
+ }
#endif
}
#endif
@@ -1888,11 +1877,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
}
-static void loop_set_timer(struct loop_device *lo)
-{
- timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
static void loop_process_work(struct loop_worker *worker,
struct list_head *cmd_list, struct loop_device *lo)
{
@@ -1941,27 +1925,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
}
-static void loop_free_idle_workers(struct timer_list *timer)
-{
- struct loop_device *lo = container_of(timer, struct loop_device, timer);
- struct loop_worker *pos, *worker;
-
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- if (time_is_after_jiffies(worker->last_ran_at +
- LOOP_IDLE_WORKER_TIMEOUT))
- break;
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- if (!list_empty(&lo->idle_worker_list))
- loop_set_timer(lo);
- spin_unlock_irq(&lo->lo_work_lock);
-}
-
static const struct blk_mq_ops loop_mq_ops = {
.queue_rq = loop_queue_rq,
.complete = lo_complete_rq,
@@ -1977,6 +1940,9 @@ static int loop_add(int i)
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
if (!lo)
goto out;
+ lo->worker_tree = RB_ROOT;
+ INIT_LIST_HEAD(&lo->idle_worker_list);
+ timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
lo->lo_state = Lo_unbound;
err = mutex_lock_killable(&loop_ctl_mutex);
@@ -2046,11 +2012,12 @@ static int loop_add(int i)
*/
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART;
- atomic_set(&lo->lo_refcnt, 0);
mutex_init(&lo->lo_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
spin_lock_init(&lo->lo_work_lock);
+ INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
+ INIT_LIST_HEAD(&lo->rootcg_cmd_list);
disk->major = LOOP_MAJOR;
disk->first_minor = i << part_shift;
disk->minors = 1 << part_shift;
@@ -2090,15 +2057,14 @@ static void loop_remove(struct loop_device *lo)
{
/* Make this loop device unreachable from pathname. */
del_gendisk(lo->lo_disk);
- blk_cleanup_disk(lo->lo_disk);
+ blk_cleanup_queue(lo->lo_disk->queue);
blk_mq_free_tag_set(&lo->tag_set);
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
- /* There is no route which can find this loop device. */
- mutex_destroy(&lo->lo_mutex);
- kfree(lo);
+
+ put_disk(lo->lo_disk);
}
static void loop_probe(dev_t dev)
@@ -2137,13 +2103,12 @@ static int loop_control_remove(int idx)
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
goto mark_visible;
- if (lo->lo_state != Lo_unbound ||
- atomic_read(&lo->lo_refcnt) > 0) {
+ if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
mutex_unlock(&lo->lo_mutex);
ret = -EBUSY;
goto mark_visible;
}
- /* Mark this loop device no longer open()-able. */
+ /* Mark this loop device as no more bound, but not quite unbound yet */
lo->lo_state = Lo_deleting;
mutex_unlock(&lo->lo_mutex);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
deleted file mode 100644
index 082d4b6bfc6a..000000000000
--- a/drivers/block/loop.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
- */
-#ifndef _LINUX_LOOP_H
-#define _LINUX_LOOP_H
-
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <uapi/linux/loop.h>
-
-/* Possible states of device */
-enum {
- Lo_unbound,
- Lo_bound,
- Lo_rundown,
- Lo_deleting,
-};
-
-struct loop_func_table;
-
-struct loop_device {
- int lo_number;
- atomic_t lo_refcnt;
- loff_t lo_offset;
- loff_t lo_sizelimit;
- int lo_flags;
- char lo_file_name[LO_NAME_SIZE];
-
- struct file * lo_backing_file;
- struct block_device *lo_device;
-
- gfp_t old_gfp_mask;
-
- spinlock_t lo_lock;
- int lo_state;
- spinlock_t lo_work_lock;
- struct workqueue_struct *workqueue;
- struct work_struct rootcg_work;
- struct list_head rootcg_cmd_list;
- struct list_head idle_worker_list;
- struct rb_root worker_tree;
- struct timer_list timer;
- bool use_dio;
- bool sysfs_inited;
-
- struct request_queue *lo_queue;
- struct blk_mq_tag_set tag_set;
- struct gendisk *lo_disk;
- struct mutex lo_mutex;
- bool idr_visible;
-};
-
-struct loop_cmd {
- struct list_head list_entry;
- bool use_aio; /* use AIO interface to handle I/O */
- atomic_t ref; /* only for aio */
- long ret;
- struct kiocb iocb;
- struct bio_vec *bvec;
- struct cgroup_subsys_state *blkcg_css;
- struct cgroup_subsys_state *memcg_css;
-};
-
-#endif
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 4fbaf0b4958b..27386a572ba4 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2729,7 +2729,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
{
struct mtip_port *port = dd->port;
- /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+ /* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
port->block1 =
dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
&port->block1_dma, GFP_KERNEL);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5a1f98494ddd..ac8b045c777c 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
nbd->disk->queue->limits.discard_granularity = blksize;
- nbd->disk->queue->limits.discard_alignment = blksize;
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
}
blk_queue_logical_block_size(nbd->disk->queue, blksize);
@@ -947,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
- if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
+
+ if (!wait_event_timeout(config->conn_wait,
+ test_bit(NBD_RT_DISCONNECTED,
+ &config->runtime_flags) ||
+ atomic_read(&config->live_connections) > 0,
+ config->dead_conn_timeout))
return 0;
- return wait_event_timeout(config->conn_wait,
- atomic_read(&config->live_connections) > 0,
- config->dead_conn_timeout) > 0;
+
+ return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
}
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -1217,11 +1220,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
return -ENOSPC;
}
-static void nbd_bdev_reset(struct block_device *bdev)
+static void nbd_bdev_reset(struct nbd_device *nbd)
{
- if (bdev->bd_openers > 1)
+ if (disk_openers(nbd->disk) > 1)
return;
- set_capacity(bdev->bd_disk, 0);
+ set_capacity(nbd->disk, 0);
}
static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1231,8 +1234,6 @@ static void nbd_parse_flags(struct nbd_device *nbd)
set_disk_ro(nbd->disk, true);
else
set_disk_ro(nbd->disk, false);
- if (config->flags & NBD_FLAG_SEND_TRIM)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
if (config->flags & NBD_FLAG_SEND_FLUSH) {
if (config->flags & NBD_FLAG_SEND_FUA)
blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1318,9 +1319,7 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->tag_set.timeout = 0;
nbd->disk->queue->limits.discard_granularity = 0;
- nbd->disk->queue->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+ blk_queue_max_discard_sectors(nbd->disk->queue, 0);
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
@@ -1389,7 +1388,7 @@ static int nbd_start_device(struct nbd_device *nbd)
return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
-static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
+static int nbd_start_device_ioctl(struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
int ret;
@@ -1408,7 +1407,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
- nbd_bdev_reset(bdev);
+ nbd_bdev_reset(nbd);
/* user requested, ignore socket errors */
if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
@@ -1422,7 +1421,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
{
sock_shutdown(nbd);
__invalidate_device(bdev, true);
- nbd_bdev_reset(bdev);
+ nbd_bdev_reset(nbd);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
@@ -1468,7 +1467,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
config->flags = arg;
return 0;
case NBD_DO_IT:
- return nbd_start_device_ioctl(nbd, bdev);
+ return nbd_start_device_ioctl(nbd);
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
@@ -1579,7 +1578,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
struct nbd_device *nbd = disk->private_data;
if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
- disk->part0->bd_openers == 0)
+ disk_openers(disk) == 0)
nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
@@ -1784,7 +1783,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 0;
- disk->queue->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
blk_queue_max_segments(disk->queue, USHRT_MAX);
@@ -2082,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
sock_shutdown(nbd);
+ wake_up(&nbd->config->conn_wait);
/*
* Make sure recv thread has finished, we can safely call nbd_clear_que()
* to cancel the inflight I/Os.
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index c441a4972064..539cfeac263d 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -11,6 +11,9 @@
#include <linux/init.h>
#include "null_blk.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "null_blk: " fmt
+
#define FREE_BATCH 16
#define TICKS_PER_SEC 50ULL
@@ -232,6 +235,7 @@ static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
static int null_add_dev(struct nullb_device *dev);
+static struct nullb *null_find_dev_by_name(const char *name);
static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
static inline struct nullb_device *to_nullb_device(struct config_item *item)
@@ -560,6 +564,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
{
struct nullb_device *dev;
+ if (null_find_dev_by_name(name))
+ return ERR_PTR(-EEXIST);
+
dev = null_alloc_dev();
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -1765,9 +1772,7 @@ static void null_config_discard(struct nullb *nullb)
}
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
- nullb->q->limits.discard_alignment = nullb->dev->blocksize;
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
}
static const struct block_device_operations null_bio_ops = {
@@ -2061,7 +2066,13 @@ static int null_add_dev(struct nullb_device *dev)
null_config_discard(nullb);
- sprintf(nullb->disk_name, "nullb%d", nullb->index);
+ if (config_item_name(&dev->item)) {
+ /* Use configfs dir name as the device name */
+ snprintf(nullb->disk_name, sizeof(nullb->disk_name),
+ "%s", config_item_name(&dev->item));
+ } else {
+ sprintf(nullb->disk_name, "nullb%d", nullb->index);
+ }
rv = null_gendisk_register(nullb);
if (rv)
@@ -2071,6 +2082,8 @@ static int null_add_dev(struct nullb_device *dev)
list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock);
+ pr_info("disk %s created\n", nullb->disk_name);
+
return 0;
out_cleanup_zone:
null_free_zoned_dev(dev);
@@ -2088,12 +2101,53 @@ out:
return rv;
}
+static struct nullb *null_find_dev_by_name(const char *name)
+{
+ struct nullb *nullb = NULL, *nb;
+
+ mutex_lock(&lock);
+ list_for_each_entry(nb, &nullb_list, list) {
+ if (strcmp(nb->disk_name, name) == 0) {
+ nullb = nb;
+ break;
+ }
+ }
+ mutex_unlock(&lock);
+
+ return nullb;
+}
+
+static int null_create_dev(void)
+{
+ struct nullb_device *dev;
+ int ret;
+
+ dev = null_alloc_dev();
+ if (!dev)
+ return -ENOMEM;
+
+ ret = null_add_dev(dev);
+ if (ret) {
+ null_free_dev(dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void null_destroy_dev(struct nullb *nullb)
+{
+ struct nullb_device *dev = nullb->dev;
+
+ null_del_dev(nullb);
+ null_free_dev(dev);
+}
+
static int __init null_init(void)
{
int ret = 0;
unsigned int i;
struct nullb *nullb;
- struct nullb_device *dev;
if (g_bs > PAGE_SIZE) {
pr_warn("invalid block size\n");
@@ -2113,19 +2167,21 @@ static int __init null_init(void)
}
if (g_queue_mode == NULL_Q_RQ) {
- pr_err("legacy IO path no longer available\n");
+ pr_err("legacy IO path is no longer available\n");
return -EINVAL;
}
+
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
if (g_submit_queues != nr_online_nodes) {
pr_warn("submit_queues param is set to %u.\n",
- nr_online_nodes);
+ nr_online_nodes);
g_submit_queues = nr_online_nodes;
}
- } else if (g_submit_queues > nr_cpu_ids)
+ } else if (g_submit_queues > nr_cpu_ids) {
g_submit_queues = nr_cpu_ids;
- else if (g_submit_queues <= 0)
+ } else if (g_submit_queues <= 0) {
g_submit_queues = 1;
+ }
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
ret = null_init_tag_set(NULL, &tag_set);
@@ -2149,16 +2205,9 @@ static int __init null_init(void)
}
for (i = 0; i < nr_devices; i++) {
- dev = null_alloc_dev();
- if (!dev) {
- ret = -ENOMEM;
- goto err_dev;
- }
- ret = null_add_dev(dev);
- if (ret) {
- null_free_dev(dev);
+ ret = null_create_dev();
+ if (ret)
goto err_dev;
- }
}
pr_info("module loaded\n");
@@ -2167,9 +2216,7 @@ static int __init null_init(void)
err_dev:
while (!list_empty(&nullb_list)) {
nullb = list_entry(nullb_list.next, struct nullb, list);
- dev = nullb->dev;
- null_del_dev(nullb);
- null_free_dev(dev);
+ null_destroy_dev(nullb);
}
unregister_blkdev(null_major, "nullb");
err_conf:
@@ -2190,12 +2237,8 @@ static void __exit null_exit(void)
mutex_lock(&lock);
while (!list_empty(&nullb_list)) {
- struct nullb_device *dev;
-
nullb = list_entry(nullb_list.next, struct nullb, list);
- dev = nullb->dev;
- null_del_dev(nullb);
- null_free_dev(dev);
+ null_destroy_dev(nullb);
}
mutex_unlock(&lock);
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 78eb56b0ca55..4525a65e1b23 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -16,13 +16,15 @@
#include <linux/mutex.h>
struct nullb_cmd {
- struct request *rq;
- struct bio *bio;
+ union {
+ struct request *rq;
+ struct bio *bio;
+ };
unsigned int tag;
blk_status_t error;
+ bool fake_timeout;
struct nullb_queue *nq;
struct hrtimer timer;
- bool fake_timeout;
};
struct nullb_queue {
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index dae54dd1aeac..ed158ea4fdd1 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -6,6 +6,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
+#undef pr_fmt
+#define pr_fmt(fmt) "null_blk: " fmt
+
static inline sector_t mb_to_sects(unsigned long mb)
{
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
@@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
dev->zone_capacity = dev->zone_size;
if (dev->zone_capacity > dev->zone_size) {
- pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
- dev->zone_capacity, dev->zone_size);
+ pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
+ dev->zone_capacity, dev->zone_size);
return -EINVAL;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 86c8794ede41..789093375344 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -12,7 +12,7 @@
* Theory of operation:
*
* At the lowest level, there is the standard driver for the CD/DVD device,
- * typically ide-cd.c or sr.c. This driver can handle read and write requests,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
* but it doesn't know anything about the special restrictions that apply to
* packet writing. One restriction is that write requests must be aligned to
* packet boundaries on the physical media, and the size of a write request
@@ -522,7 +522,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
goto no_pkt;
pkt->frames = frames;
- pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
+ pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
if (!pkt->w_bio)
goto no_bio;
@@ -536,27 +536,21 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
bio_list_init(&pkt->orig_bios);
for (i = 0; i < frames; i++) {
- struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
- if (!bio)
+ pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+ if (!pkt->r_bios[i])
goto no_rd_bio;
-
- pkt->r_bios[i] = bio;
}
return pkt;
no_rd_bio:
- for (i = 0; i < frames; i++) {
- struct bio *bio = pkt->r_bios[i];
- if (bio)
- bio_put(bio);
- }
-
+ for (i = 0; i < frames; i++)
+ kfree(pkt->r_bios[i]);
no_page:
for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
if (pkt->pages[i])
__free_page(pkt->pages[i]);
- bio_put(pkt->w_bio);
+ kfree(pkt->w_bio);
no_bio:
kfree(pkt);
no_pkt:
@@ -570,14 +564,11 @@ static void pkt_free_packet_data(struct packet_data *pkt)
{
int i;
- for (i = 0; i < pkt->frames; i++) {
- struct bio *bio = pkt->r_bios[i];
- if (bio)
- bio_put(bio);
- }
+ for (i = 0; i < pkt->frames; i++)
+ kfree(pkt->r_bios[i]);
for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
__free_page(pkt->pages[i]);
- bio_put(pkt->w_bio);
+ kfree(pkt->w_bio);
kfree(pkt);
}
@@ -951,6 +942,7 @@ static void pkt_end_io_read(struct bio *bio)
if (bio->bi_status)
atomic_inc(&pkt->io_errors);
+ bio_uninit(bio);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
wake_up(&pd->wqueue);
@@ -968,6 +960,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
pd->stats.pkt_ended++;
+ bio_uninit(bio);
pkt_bio_finished(pd);
atomic_dec(&pkt->io_wait);
atomic_inc(&pkt->run_sm);
@@ -1022,7 +1015,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
continue;
bio = pkt->r_bios[f];
- bio_reset(bio, pd->bdev, REQ_OP_READ);
+ bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1235,7 +1228,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
{
int f;
- bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
+ bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+ REQ_OP_WRITE);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b844432bad20..2b21f717cce1 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4942,7 +4942,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
if (rbd_dev->opts->trim) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = rbd_dev->opts->alloc_size;
blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index b66e8840b94b..409c76b81aed 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -25,6 +25,7 @@ static int rnbd_client_major;
static DEFINE_IDA(index_ida);
static DEFINE_MUTEX(sess_lock);
static LIST_HEAD(sess_list);
+static struct workqueue_struct *rnbd_clt_wq;
/*
* Maximum number of partitions an instance can have.
@@ -1364,11 +1365,9 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
dev->queue->limits.discard_granularity = dev->discard_granularity;
dev->queue->limits.discard_alignment = dev->discard_alignment;
- if (dev->max_discard_sectors)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, dev->queue);
if (dev->secure_discard)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, dev->queue);
-
+ blk_queue_max_secure_erase_sectors(dev->queue,
+ dev->max_discard_sectors);
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
blk_queue_max_segments(dev->queue, dev->max_segments);
@@ -1761,12 +1760,12 @@ static void rnbd_destroy_sessions(void)
* procedure takes minutes.
*/
INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
- queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
+ queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
}
rnbd_clt_put_sess(sess);
}
/* Wait for all scheduled unmap works */
- flush_workqueue(system_long_wq);
+ flush_workqueue(rnbd_clt_wq);
WARN_ON(!list_empty(&sess_list));
}
@@ -1791,6 +1790,14 @@ static int __init rnbd_client_init(void)
pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
err);
unregister_blkdev(rnbd_client_major, "rnbd");
+ return err;
+ }
+ rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
+ if (!rnbd_clt_wq) {
+ pr_err("Failed to load module, alloc_workqueue failed.\n");
+ rnbd_clt_destroy_sysfs_files();
+ unregister_blkdev(rnbd_client_major, "rnbd");
+ err = -ENOMEM;
}
return err;
@@ -1801,6 +1808,7 @@ static void __exit rnbd_client_exit(void)
rnbd_destroy_sessions();
unregister_blkdev(rnbd_client_major, "rnbd");
ida_destroy(&index_ida);
+ destroy_workqueue(rnbd_clt_wq);
}
module_init(rnbd_client_init);
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
index 2c3df02b5e8e..4309e5252469 100644
--- a/drivers/block/rnbd/rnbd-srv-dev.h
+++ b/drivers/block/rnbd/rnbd-srv-dev.h
@@ -44,16 +44,12 @@ static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
{
- return blk_queue_secure_erase(bdev_get_queue(dev->bdev));
+ return bdev_max_secure_erase_sectors(dev->bdev);
}
static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
{
- if (!blk_queue_discard(bdev_get_queue(dev->bdev)))
- return 0;
-
- return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev),
- REQ_OP_DISCARD);
+ return bdev_max_discard_sectors(dev->bdev);
}
static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
@@ -63,7 +59,7 @@ static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
{
- return bdev_get_queue(dev->bdev)->limits.discard_alignment;
+ return bdev_discard_alignment(dev->bdev);
}
#endif /* RNBD_SRV_DEV_H */
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index f04df6294650..beaef43a67b9 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -533,7 +533,6 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
struct rnbd_srv_sess_dev *sess_dev)
{
struct rnbd_dev *rnbd_dev = sess_dev->rnbd_dev;
- struct request_queue *q = bdev_get_queue(rnbd_dev->bdev);
rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP);
rsp->device_id =
@@ -558,9 +557,9 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
rsp->secure_discard =
cpu_to_le16(rnbd_dev_get_secure_discard(rnbd_dev));
rsp->cache_policy = 0;
- if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (bdev_write_cache(rnbd_dev->bdev))
rsp->cache_policy |= RNBD_WRITEBACK;
- if (blk_queue_fua(q))
+ if (bdev_fua(rnbd_dev->bdev))
rsp->cache_policy |= RNBD_FUA;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index a8bcf3f664af..d624cc8eddc3 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_queue_io_opt(q, blk_size * opt_io_size);
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
- q->limits.discard_granularity = blk_size;
-
virtio_cread(vdev, struct virtio_blk_config,
discard_sector_alignment, &v);
- q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
+ if (v)
+ q->limits.discard_granularity = v << SECTOR_SHIFT;
+ else
+ q->limits.discard_granularity = blk_size;
virtio_cread(vdev, struct virtio_blk_config,
max_discard_sectors, &v);
@@ -888,8 +889,6 @@ static int virtblk_probe(struct virtio_device *vdev)
v = sg_elems;
blk_queue_max_discard_segments(q,
min(v, MAX_DISCARD_SEGMENTS));
-
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index de42458195bc..a97f2bf5b01b 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -970,7 +970,6 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
int status = BLKIF_RSP_OKAY;
struct xen_blkif *blkif = ring->blkif;
struct block_device *bdev = blkif->vbd.bdev;
- unsigned long secure;
struct phys_req preq;
xen_blkif_get(blkif);
@@ -987,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
}
ring->st_ds_req++;
- secure = (blkif->vbd.discard_secure &&
- (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
- BLKDEV_DISCARD_SECURE : 0;
+ if (blkif->vbd.discard_secure &&
+ (req->u.discard.flag & BLKIF_DISCARD_SECURE))
+ err = blkdev_issue_secure_erase(bdev,
+ req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
+ else
+ err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
- err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
- req->u.discard.nr_sectors,
- GFP_KERNEL, secure);
fail_response:
if (err == -EOPNOTSUPP) {
pr_debug("discard op failed, not supported\n");
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index f09040435e2e..97de13b14175 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -484,7 +484,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
{
struct xen_vbd *vbd;
struct block_device *bdev;
- struct request_queue *q;
vbd = &blkif->vbd;
vbd->handle = handle;
@@ -516,11 +515,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
vbd->type |= VDISK_REMOVABLE;
- q = bdev_get_queue(bdev);
- if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (bdev_write_cache(bdev))
vbd->flush_support = true;
-
- if (q && blk_queue_secure_erase(q))
+ if (bdev_max_secure_erase_sectors(bdev))
vbd->discard_secure = true;
vbd->feature_gnt_persistent = feature_persistent;
@@ -578,22 +575,21 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
int err;
int state = 0;
struct block_device *bdev = be->blkif->vbd.bdev;
- struct request_queue *q = bdev_get_queue(bdev);
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
return;
- if (blk_queue_discard(q)) {
+ if (bdev_max_discard_sectors(bdev)) {
err = xenbus_printf(xbt, dev->nodename,
"discard-granularity", "%u",
- q->limits.discard_granularity);
+ bdev_discard_granularity(bdev));
if (err) {
dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
return;
}
err = xenbus_printf(xbt, dev->nodename,
"discard-alignment", "%u",
- q->limits.discard_alignment);
+ bdev_discard_alignment(bdev));
if (err) {
dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
return;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 003056d4f7f5..0f3f5238f7bc 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -944,13 +944,13 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
if (info->feature_discard) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity ?:
info->physical_sector_size;
rq->limits.discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_max_secure_erase_sectors(rq,
+ get_capacity(gd));
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -1606,8 +1606,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
- blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_max_discard_sectors(rq, 0);
+ blk_queue_max_secure_erase_sectors(rq, 0);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e9474b02012d..6853dd3c7d3a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1675,9 +1675,10 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
+ start_time = bdev_start_io_acct(bdev->bd_disk->part0,
+ SECTORS_PER_PAGE, op, jiffies);
ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
- disk_end_io_acct(bdev->bd_disk, op, start_time);
+ bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -1786,7 +1787,7 @@ static ssize_t reset_store(struct device *dev,
int ret;
unsigned short do_reset;
struct zram *zram;
- struct block_device *bdev;
+ struct gendisk *disk;
ret = kstrtou16(buf, 10, &do_reset);
if (ret)
@@ -1796,26 +1797,26 @@ static ssize_t reset_store(struct device *dev,
return -EINVAL;
zram = dev_to_zram(dev);
- bdev = zram->disk->part0;
+ disk = zram->disk;
- mutex_lock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&disk->open_mutex);
/* Do not reset an active device or claimed device */
- if (bdev->bd_openers || zram->claim) {
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ if (disk_openers(disk) || zram->claim) {
+ mutex_unlock(&disk->open_mutex);
return -EBUSY;
}
/* From now on, anyone can't open /dev/zram[0-9] */
zram->claim = true;
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
/* Make sure all the pending I/O are finished */
- sync_blockdev(bdev);
+ sync_blockdev(disk->part0);
zram_reset_device(zram);
- mutex_lock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&disk->open_mutex);
zram->claim = false;
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&disk->open_mutex);
return len;
}
@@ -1952,7 +1953,6 @@ static int zram_add(void)
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
/*
* zram_bio_discard() will clear all logical blocks if logical block
@@ -1987,19 +1987,18 @@ out_free_dev:
static int zram_remove(struct zram *zram)
{
- struct block_device *bdev = zram->disk->part0;
bool claimed;
- mutex_lock(&bdev->bd_disk->open_mutex);
- if (bdev->bd_openers) {
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_lock(&zram->disk->open_mutex);
+ if (disk_openers(zram->disk)) {
+ mutex_unlock(&zram->disk->open_mutex);
return -EBUSY;
}
claimed = zram->claim;
if (!claimed)
zram->claim = true;
- mutex_unlock(&bdev->bd_disk->open_mutex);
+ mutex_unlock(&zram->disk->open_mutex);
zram_debugfs_unregister(zram);
@@ -2011,7 +2010,7 @@ static int zram_remove(struct zram *zram)
;
} else {
/* Make sure all the pending I/O are finished */
- sync_blockdev(bdev);
+ sync_blockdev(zram->disk->part0);
zram_reset_device(zram);
}
diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c
index 5e0e4393ce4d..0cfe859a4ac4 100644
--- a/drivers/bus/fsl-mc/fsl-mc-msi.c
+++ b/drivers/bus/fsl-mc/fsl-mc-msi.c
@@ -224,8 +224,12 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev, unsigned int irq_count)
if (error)
return error;
+ msi_lock_descs(dev);
if (msi_first_desc(dev, MSI_DESC_ALL))
- return -EINVAL;
+ error = -EINVAL;
+ msi_unlock_descs(dev);
+ if (error)
+ return error;
/*
* NOTE: Calling this function will trigger the invocation of the
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index 60fbd42041dd..828c66bbaa67 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -352,8 +352,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action,
pdev = of_find_device_by_node(rd->dn);
if (!pdev) {
- dev_err(&pdev->dev,
- "Could not find platform device for '%pOF'\n",
+ pr_err("Could not find platform device for '%pOF'\n",
rd->dn);
ret = notifier_from_errno(-EINVAL);
@@ -370,7 +369,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action,
return ret;
}
-struct notifier_block weim_of_notifier = {
+static struct notifier_block weim_of_notifier = {
.notifier_call = of_weim_notify,
};
#endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */
diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index 9527b7d63840..541ced27d941 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -1060,6 +1060,7 @@ static int __maybe_unused mhi_pci_freeze(struct device *dev)
* the intermediate restore kernel reinitializes MHI device with new
* context.
*/
+ flush_work(&mhi_pdev->recovery_work);
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, true);
mhi_unprepare_after_power_down(mhi_cntrl);
@@ -1085,6 +1086,7 @@ static const struct dev_pm_ops mhi_pci_pm_ops = {
.resume = mhi_pci_resume,
.freeze = mhi_pci_freeze,
.thaw = mhi_pci_restore,
+ .poweroff = mhi_pci_freeze,
.restore = mhi_pci_restore,
#endif
};
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 4566e730ef2b..60b082fe2ed0 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -227,6 +227,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb,
dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev));
+ return rdev;
+
err_device_add:
put_device(&rdev->dev);
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 54c0ee6dda30..7a1b1f9e4933 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -3232,13 +3232,27 @@ static int sysc_check_disabled_devices(struct sysc *ddata)
*/
static int sysc_check_active_timer(struct sysc *ddata)
{
+ int error;
+
if (ddata->cap->type != TI_SYSC_OMAP2_TIMER &&
ddata->cap->type != TI_SYSC_OMAP4_TIMER)
return 0;
+ /*
+ * Quirk for omap3 beagleboard revision A to B4 to use gpt12.
+ * Revision C and later are fixed with commit 23885389dbbb ("ARM:
+ * dts: Fix timer regression for beagleboard revision c"). This all
+ * can be dropped if we stop supporting old beagleboard revisions
+ * A to B4 at some point.
+ */
+ if (sysc_soc->soc == SOC_3430)
+ error = -ENXIO;
+ else
+ error = -EBUSY;
+
if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) &&
(ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE))
- return -ENXIO;
+ return error;
return 0;
}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 2dc9da683a13..416f723a2dbb 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -14,15 +14,6 @@
actually talk to the hardware. Suggestions are welcome.
Patches that work are more welcome though. ;-)
- To Do List:
- ----------------------------------
-
- -- Modify sysctl/proc interface. I plan on having one directory per
- drive, with entries for outputing general drive information, and sysctl
- based tunable parameters such as whether the tray should auto-close for
- that drive. Suggestions (or patches) for this welcome!
-
-
Revision History
----------------------------------
1.00 Date Unknown -- David van Leeuwen <david@tm.tno.nl>
@@ -648,6 +639,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
mutex_unlock(&cdrom_mutex);
return 0;
}
+EXPORT_SYMBOL(register_cdrom);
#undef ENSURE
void unregister_cdrom(struct cdrom_device_info *cdi)
@@ -663,6 +655,7 @@ void unregister_cdrom(struct cdrom_device_info *cdi)
cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
}
+EXPORT_SYMBOL(unregister_cdrom);
int cdrom_get_media_event(struct cdrom_device_info *cdi,
struct media_event_desc *med)
@@ -690,6 +683,7 @@ int cdrom_get_media_event(struct cdrom_device_info *cdi,
memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
return 0;
}
+EXPORT_SYMBOL(cdrom_get_media_event);
static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
struct rwrt_feature_desc *rfd)
@@ -1206,6 +1200,7 @@ err:
cdi->use_count--;
return ret;
}
+EXPORT_SYMBOL(cdrom_open);
/* This code is similar to that in open_for_data. The routine is called
whenever an audio play operation is requested.
@@ -1301,6 +1296,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
cdo->tray_move(cdi, 1);
}
}
+EXPORT_SYMBOL(cdrom_release);
static int cdrom_read_mech_status(struct cdrom_device_info *cdi,
struct cdrom_changer_info *buf)
@@ -1382,6 +1378,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi)
kfree(info);
return nslots;
}
+EXPORT_SYMBOL(cdrom_number_of_slots);
/* If SLOT < 0, unload the current slot. Otherwise, try to load SLOT. */
@@ -1581,6 +1578,7 @@ void init_cdrom_command(struct packet_command *cgc, void *buf, int len,
cgc->data_direction = type;
cgc->timeout = CDROM_DEF_TIMEOUT;
}
+EXPORT_SYMBOL(init_cdrom_command);
/* DVD handling */
@@ -1999,6 +1997,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
cgc->data_direction = CGC_DATA_READ;
return cdo->generic_packet(cdi, cgc);
}
+EXPORT_SYMBOL(cdrom_mode_sense);
int cdrom_mode_select(struct cdrom_device_info *cdi,
struct packet_command *cgc)
@@ -2014,6 +2013,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi,
cgc->data_direction = CGC_DATA_WRITE;
return cdo->generic_packet(cdi, cgc);
}
+EXPORT_SYMBOL(cdrom_mode_select);
static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
struct cdrom_subchnl *subchnl, int mcn)
@@ -2443,14 +2443,6 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
return -EINVAL;
}
- /*
- * ->select_disc is a hook to allow a driver-specific way of
- * seleting disc. However, since there is no equivalent hook for
- * cdrom_slot_status this may not actually be useful...
- */
- if (cdi->ops->select_disc)
- return cdi->ops->select_disc(cdi, arg);
-
cd_dbg(CD_CHANGER, "Using generic cdrom_select_disc()\n");
return cdrom_select_disc(cdi, arg);
}
@@ -2892,6 +2884,7 @@ use_toc:
*last_written = toc.cdte_addr.lba;
return 0;
}
+EXPORT_SYMBOL(cdrom_get_last_written);
/* return the next writable block. also for udf file system. */
static int cdrom_get_next_writable(struct cdrom_device_info *cdi,
@@ -3429,18 +3422,7 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
return -ENOSYS;
}
-
-EXPORT_SYMBOL(cdrom_get_last_written);
-EXPORT_SYMBOL(register_cdrom);
-EXPORT_SYMBOL(unregister_cdrom);
-EXPORT_SYMBOL(cdrom_open);
-EXPORT_SYMBOL(cdrom_release);
EXPORT_SYMBOL(cdrom_ioctl);
-EXPORT_SYMBOL(cdrom_number_of_slots);
-EXPORT_SYMBOL(cdrom_mode_select);
-EXPORT_SYMBOL(cdrom_mode_sense);
-EXPORT_SYMBOL(init_cdrom_command);
-EXPORT_SYMBOL(cdrom_get_media_event);
#ifdef CONFIG_SYSCTL
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index c59265146e9c..f1827257ef0e 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3677,8 +3677,11 @@ static void cleanup_smi_msgs(struct ipmi_smi *intf)
void ipmi_unregister_smi(struct ipmi_smi *intf)
{
struct ipmi_smi_watcher *w;
- int intf_num = intf->intf_num, index;
+ int intf_num, index;
+ if (!intf)
+ return;
+ intf_num = intf->intf_num;
mutex_lock(&ipmi_interfaces_mutex);
intf->intf_num = -1;
intf->in_shutdown = true;
@@ -4518,6 +4521,8 @@ return_unspecified:
} else
/* The message was sent, start the timer. */
intf_start_seq_timer(intf, msg->msgid);
+ requeue = 0;
+ goto out;
} else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1))
|| (msg->rsp[1] != msg->data[1])) {
/*
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 64dedb3ef8ec..5604a810fb3d 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2220,10 +2220,7 @@ static void cleanup_one_si(struct smi_info *smi_info)
return;
list_del(&smi_info->link);
-
- if (smi_info->intf)
- ipmi_unregister_smi(smi_info->intf);
-
+ ipmi_unregister_smi(smi_info->intf);
kfree(smi_info);
}
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 3a293f919af9..4c9adb4f3d5d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -318,6 +318,13 @@ static void crng_reseed(bool force)
* the resultant ChaCha state to the user, along with the second
* half of the block containing 32 bytes of random data that may
* be used; random_data_len may not be greater than 32.
+ *
+ * The returned ChaCha state contains within it a copy of the old
+ * key value, at index 4, so the state should always be zeroed out
+ * immediately after using in order to maintain forward secrecy.
+ * If the state cannot be erased in a timely manner, then it is
+ * safer to set the random_data parameter to &chacha_state[4] so
+ * that this function overwrites it before returning.
*/
static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
u32 chacha_state[CHACHA_STATE_WORDS],
@@ -333,7 +340,7 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE],
chacha20_block(chacha_state, first_block);
memcpy(key, first_block, CHACHA_KEY_SIZE);
- memmove(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
+ memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len);
memzero_explicit(first_block, sizeof(first_block));
}
diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
index 23cc8297ec4c..d429ba52a719 100644
--- a/drivers/clk/at91/clk-generated.c
+++ b/drivers/clk/at91/clk-generated.c
@@ -117,6 +117,10 @@ static void clk_generated_best_diff(struct clk_rate_request *req,
tmp_rate = parent_rate;
else
tmp_rate = parent_rate / div;
+
+ if (tmp_rate < req->min_rate || tmp_rate > req->max_rate)
+ return;
+
tmp_diff = abs(req->rate - tmp_rate);
if (*best_diff < 0 || *best_diff >= tmp_diff) {
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 3ad20e75fd23..48a1eb9f2d55 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -941,6 +941,7 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw,
u64 temp = (u64)parent_rate << CM_DIV_FRAC_BITS;
u32 div, mindiv, maxdiv;
+ do_div(temp, rate);
div = temp;
div &= ~unused_frac_mask;
diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c
index aa1561b773d6..070c3b896559 100644
--- a/drivers/clk/microchip/clk-mpfs.c
+++ b/drivers/clk/microchip/clk-mpfs.c
@@ -11,20 +11,48 @@
#include <dt-bindings/clock/microchip,mpfs-clock.h>
/* address offset of control registers */
+#define REG_MSSPLL_REF_CR 0x08u
+#define REG_MSSPLL_POSTDIV_CR 0x10u
+#define REG_MSSPLL_SSCG_2_CR 0x2Cu
#define REG_CLOCK_CONFIG_CR 0x08u
+#define REG_RTC_CLOCK_CR 0x0Cu
#define REG_SUBBLK_CLOCK_CR 0x84u
#define REG_SUBBLK_RESET_CR 0x88u
+#define MSSPLL_FBDIV_SHIFT 0x00u
+#define MSSPLL_FBDIV_WIDTH 0x0Cu
+#define MSSPLL_REFDIV_SHIFT 0x08u
+#define MSSPLL_REFDIV_WIDTH 0x06u
+#define MSSPLL_POSTDIV_SHIFT 0x08u
+#define MSSPLL_POSTDIV_WIDTH 0x07u
+#define MSSPLL_FIXED_DIV 4u
+
struct mpfs_clock_data {
void __iomem *base;
+ void __iomem *msspll_base;
struct clk_hw_onecell_data hw_data;
};
+struct mpfs_msspll_hw_clock {
+ void __iomem *base;
+ unsigned int id;
+ u32 reg_offset;
+ u32 shift;
+ u32 width;
+ u32 flags;
+ struct clk_hw hw;
+ struct clk_init_data init;
+};
+
+#define to_mpfs_msspll_clk(_hw) container_of(_hw, struct mpfs_msspll_hw_clock, hw)
+
struct mpfs_cfg_clock {
const struct clk_div_table *table;
unsigned int id;
+ u32 reg_offset;
u8 shift;
u8 width;
+ u8 flags;
};
struct mpfs_cfg_hw_clock {
@@ -55,7 +83,7 @@ struct mpfs_periph_hw_clock {
*/
static DEFINE_SPINLOCK(mpfs_clk_lock);
-static const struct clk_parent_data mpfs_cfg_parent[] = {
+static const struct clk_parent_data mpfs_ext_ref[] = {
{ .index = 0 },
};
@@ -69,6 +97,86 @@ static const struct clk_div_table mpfs_div_ahb_table[] = {
{ 0, 0 }
};
+/*
+ * The only two supported reference clock frequencies for the PolarFire SoC are
+ * 100 and 125 MHz, as the rtc reference is required to be 1 MHz.
+ * It therefore only needs to have divider table entries corresponding to
+ * divide by 100 and 125.
+ */
+static const struct clk_div_table mpfs_div_rtcref_table[] = {
+ { 100, 100 }, { 125, 125 },
+ { 0, 0 }
+};
+
+static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate)
+{
+ struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw);
+ void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset;
+ void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR;
+ void __iomem *postdiv_addr = msspll_hw->base + REG_MSSPLL_POSTDIV_CR;
+ u32 mult, ref_div, postdiv;
+
+ mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT;
+ mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH);
+ ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT;
+ ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH);
+ postdiv = readl_relaxed(postdiv_addr) >> MSSPLL_POSTDIV_SHIFT;
+ postdiv &= clk_div_mask(MSSPLL_POSTDIV_WIDTH);
+
+ return prate * mult / (ref_div * MSSPLL_FIXED_DIV * postdiv);
+}
+
+static const struct clk_ops mpfs_clk_msspll_ops = {
+ .recalc_rate = mpfs_clk_msspll_recalc_rate,
+};
+
+#define CLK_PLL(_id, _name, _parent, _shift, _width, _flags, _offset) { \
+ .id = _id, \
+ .shift = _shift, \
+ .width = _width, \
+ .reg_offset = _offset, \
+ .flags = _flags, \
+ .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_msspll_ops, 0), \
+}
+
+static struct mpfs_msspll_hw_clock mpfs_msspll_clks[] = {
+ CLK_PLL(CLK_MSSPLL, "clk_msspll", mpfs_ext_ref, MSSPLL_FBDIV_SHIFT,
+ MSSPLL_FBDIV_WIDTH, 0, REG_MSSPLL_SSCG_2_CR),
+};
+
+static int mpfs_clk_register_msspll(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hw,
+ void __iomem *base)
+{
+ msspll_hw->base = base;
+
+ return devm_clk_hw_register(dev, &msspll_hw->hw);
+}
+
+static int mpfs_clk_register_mssplls(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hws,
+ unsigned int num_clks, struct mpfs_clock_data *data)
+{
+ void __iomem *base = data->msspll_base;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < num_clks; i++) {
+ struct mpfs_msspll_hw_clock *msspll_hw = &msspll_hws[i];
+
+ ret = mpfs_clk_register_msspll(dev, msspll_hw, base);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to register msspll id: %d\n",
+ CLK_MSSPLL);
+
+ data->hw_data.hws[msspll_hw->id] = &msspll_hw->hw;
+ }
+
+ return 0;
+}
+
+/*
+ * "CFG" clocks
+ */
+
static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long prate)
{
struct mpfs_cfg_hw_clock *cfg_hw = to_mpfs_cfg_clk(hw);
@@ -76,10 +184,10 @@ static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long p
void __iomem *base_addr = cfg_hw->sys_base;
u32 val;
- val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR) >> cfg->shift;
+ val = readl_relaxed(base_addr + cfg->reg_offset) >> cfg->shift;
val &= clk_div_mask(cfg->width);
- return prate / (1u << val);
+ return divider_recalc_rate(hw, prate, val, cfg->table, cfg->flags, cfg->width);
}
static long mpfs_cfg_clk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate)
@@ -105,11 +213,10 @@ static int mpfs_cfg_clk_set_rate(struct clk_hw *hw, unsigned long rate, unsigned
return divider_setting;
spin_lock_irqsave(&mpfs_clk_lock, flags);
-
- val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR);
+ val = readl_relaxed(base_addr + cfg->reg_offset);
val &= ~(clk_div_mask(cfg->width) << cfg_hw->cfg.shift);
val |= divider_setting << cfg->shift;
- writel_relaxed(val, base_addr + REG_CLOCK_CONFIG_CR);
+ writel_relaxed(val, base_addr + cfg->reg_offset);
spin_unlock_irqrestore(&mpfs_clk_lock, flags);
@@ -122,19 +229,33 @@ static const struct clk_ops mpfs_clk_cfg_ops = {
.set_rate = mpfs_cfg_clk_set_rate,
};
-#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags) { \
- .cfg.id = _id, \
- .cfg.shift = _shift, \
- .cfg.width = _width, \
- .cfg.table = _table, \
- .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_cfg_ops, \
- _flags), \
+#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags, _offset) { \
+ .cfg.id = _id, \
+ .cfg.shift = _shift, \
+ .cfg.width = _width, \
+ .cfg.table = _table, \
+ .cfg.reg_offset = _offset, \
+ .cfg.flags = _flags, \
+ .hw.init = CLK_HW_INIT(_name, _parent, &mpfs_clk_cfg_ops, 0), \
}
static struct mpfs_cfg_hw_clock mpfs_cfg_clks[] = {
- CLK_CFG(CLK_CPU, "clk_cpu", mpfs_cfg_parent, 0, 2, mpfs_div_cpu_axi_table, 0),
- CLK_CFG(CLK_AXI, "clk_axi", mpfs_cfg_parent, 2, 2, mpfs_div_cpu_axi_table, 0),
- CLK_CFG(CLK_AHB, "clk_ahb", mpfs_cfg_parent, 4, 2, mpfs_div_ahb_table, 0),
+ CLK_CFG(CLK_CPU, "clk_cpu", "clk_msspll", 0, 2, mpfs_div_cpu_axi_table, 0,
+ REG_CLOCK_CONFIG_CR),
+ CLK_CFG(CLK_AXI, "clk_axi", "clk_msspll", 2, 2, mpfs_div_cpu_axi_table, 0,
+ REG_CLOCK_CONFIG_CR),
+ CLK_CFG(CLK_AHB, "clk_ahb", "clk_msspll", 4, 2, mpfs_div_ahb_table, 0,
+ REG_CLOCK_CONFIG_CR),
+ {
+ .cfg.id = CLK_RTCREF,
+ .cfg.shift = 0,
+ .cfg.width = 12,
+ .cfg.table = mpfs_div_rtcref_table,
+ .cfg.reg_offset = REG_RTC_CLOCK_CR,
+ .cfg.flags = CLK_DIVIDER_ONE_BASED,
+ .hw.init =
+ CLK_HW_INIT_PARENTS_DATA("clk_rtcref", mpfs_ext_ref, &mpfs_clk_cfg_ops, 0),
+ }
};
static int mpfs_clk_register_cfg(struct device *dev, struct mpfs_cfg_hw_clock *cfg_hw,
@@ -160,13 +281,17 @@ static int mpfs_clk_register_cfgs(struct device *dev, struct mpfs_cfg_hw_clock *
return dev_err_probe(dev, ret, "failed to register clock id: %d\n",
cfg_hw->cfg.id);
- id = cfg_hws[i].cfg.id;
+ id = cfg_hw->cfg.id;
data->hw_data.hws[id] = &cfg_hw->hw;
}
return 0;
}
+/*
+ * peripheral clocks - devices connected to axi or ahb buses.
+ */
+
static int mpfs_periph_clk_enable(struct clk_hw *hw)
{
struct mpfs_periph_hw_clock *periph_hw = to_mpfs_periph_clk(hw);
@@ -200,10 +325,6 @@ static void mpfs_periph_clk_disable(struct clk_hw *hw)
spin_lock_irqsave(&mpfs_clk_lock, flags);
- reg = readl_relaxed(base_addr + REG_SUBBLK_RESET_CR);
- val = reg | (1u << periph->shift);
- writel_relaxed(val, base_addr + REG_SUBBLK_RESET_CR);
-
reg = readl_relaxed(base_addr + REG_SUBBLK_CLOCK_CR);
val = reg & ~(1u << periph->shift);
writel_relaxed(val, base_addr + REG_SUBBLK_CLOCK_CR);
@@ -249,8 +370,10 @@ static const struct clk_ops mpfs_periph_clk_ops = {
* trap handler
* - CLK_MMUART0: reserved by the hss
* - CLK_DDRC: provides clock to the ddr subsystem
- * - CLK_FICx: these provide clocks for sections of the fpga fabric, disabling them would
- * cause the fabric to go into reset
+ * - CLK_FICx: these provide the processor side clocks to the "FIC" (Fabric InterConnect)
+ * clock domain crossers which provide the interface to the FPGA fabric. Disabling them
+ * causes the FPGA fabric to go into reset.
+ * - CLK_ATHENA: The athena clock is FIC4, which is reserved for the Athena TeraFire.
*/
static struct mpfs_periph_hw_clock mpfs_periph_clks[] = {
@@ -258,7 +381,7 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = {
CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0),
CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0),
CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0),
- CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(AHB), 4, 0),
+ CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0),
CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL),
CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0),
CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0),
@@ -277,11 +400,11 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = {
CLK_PERIPH(CLK_GPIO1, "clk_periph_gpio1", PARENT_CLK(AHB), 21, 0),
CLK_PERIPH(CLK_GPIO2, "clk_periph_gpio2", PARENT_CLK(AHB), 22, 0),
CLK_PERIPH(CLK_DDRC, "clk_periph_ddrc", PARENT_CLK(AHB), 23, CLK_IS_CRITICAL),
- CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AHB), 24, CLK_IS_CRITICAL),
- CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AHB), 25, CLK_IS_CRITICAL),
- CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AHB), 26, CLK_IS_CRITICAL),
- CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AHB), 27, CLK_IS_CRITICAL),
- CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AHB), 28, 0),
+ CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AXI), 24, CLK_IS_CRITICAL),
+ CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AXI), 25, CLK_IS_CRITICAL),
+ CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AXI), 26, CLK_IS_CRITICAL),
+ CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AXI), 27, CLK_IS_CRITICAL),
+ CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AXI), 28, CLK_IS_CRITICAL),
CLK_PERIPH(CLK_CFM, "clk_periph_cfm", PARENT_CLK(AHB), 29, 0),
};
@@ -322,8 +445,9 @@ static int mpfs_clk_probe(struct platform_device *pdev)
unsigned int num_clks;
int ret;
- /* CLK_RESERVED is not part of cfg_clks nor periph_clks, so add 1 */
- num_clks = ARRAY_SIZE(mpfs_cfg_clks) + ARRAY_SIZE(mpfs_periph_clks) + 1;
+ /* CLK_RESERVED is not part of clock arrays, so add 1 */
+ num_clks = ARRAY_SIZE(mpfs_msspll_clks) + ARRAY_SIZE(mpfs_cfg_clks)
+ + ARRAY_SIZE(mpfs_periph_clks) + 1;
clk_data = devm_kzalloc(dev, struct_size(clk_data, hw_data.hws, num_clks), GFP_KERNEL);
if (!clk_data)
@@ -333,8 +457,17 @@ static int mpfs_clk_probe(struct platform_device *pdev)
if (IS_ERR(clk_data->base))
return PTR_ERR(clk_data->base);
+ clk_data->msspll_base = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(clk_data->msspll_base))
+ return PTR_ERR(clk_data->msspll_base);
+
clk_data->hw_data.num = num_clks;
+ ret = mpfs_clk_register_mssplls(dev, mpfs_msspll_clks, ARRAY_SIZE(mpfs_msspll_clks),
+ clk_data);
+ if (ret)
+ return ret;
+
ret = mpfs_clk_register_cfgs(dev, mpfs_cfg_clks, ARRAY_SIZE(mpfs_cfg_clks), clk_data);
if (ret)
return ret;
diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index f675fd969c4d..e9c357309fd9 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c
@@ -818,7 +818,7 @@ EXPORT_SYMBOL_GPL(clk_pixel_ops);
static int clk_gfx3d_determine_rate(struct clk_hw *hw,
struct clk_rate_request *req)
{
- struct clk_rate_request parent_req = { };
+ struct clk_rate_request parent_req = { .min_rate = 0, .max_rate = ULONG_MAX };
struct clk_rcg2_gfx3d *cgfx = to_clk_rcg2_gfx3d(hw);
struct clk_hw *xo, *p0, *p1, *p2;
unsigned long p0_rate;
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c
index 8a10bade7e0d..d65398497d5f 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c
@@ -241,6 +241,7 @@ static struct clk_init_data rtc_32k_init_data = {
.ops = &ccu_mux_ops,
.parent_hws = rtc_32k_parents,
.num_parents = ARRAY_SIZE(rtc_32k_parents), /* updated during probe */
+ .flags = CLK_IS_CRITICAL,
};
static struct ccu_mux rtc_32k_clk = {
@@ -297,10 +298,6 @@ static const struct sunxi_ccu_desc sun6i_rtc_ccu_desc = {
.hw_clks = &sun6i_rtc_ccu_hw_clks,
};
-static const struct clk_parent_data sun50i_h6_osc32k_fanout_parents[] = {
- { .hw = &osc32k_clk.common.hw },
-};
-
static const struct clk_parent_data sun50i_h616_osc32k_fanout_parents[] = {
{ .hw = &osc32k_clk.common.hw },
{ .fw_name = "pll-32k" },
@@ -313,13 +310,6 @@ static const struct clk_parent_data sun50i_r329_osc32k_fanout_parents[] = {
{ .hw = &osc24M_32k_clk.common.hw }
};
-static const struct sun6i_rtc_match_data sun50i_h6_rtc_ccu_data = {
- .have_ext_osc32k = true,
- .have_iosc_calibration = true,
- .osc32k_fanout_parents = sun50i_h6_osc32k_fanout_parents,
- .osc32k_fanout_nparents = ARRAY_SIZE(sun50i_h6_osc32k_fanout_parents),
-};
-
static const struct sun6i_rtc_match_data sun50i_h616_rtc_ccu_data = {
.have_iosc_calibration = true,
.rtc_32k_single_parent = true,
@@ -335,10 +325,6 @@ static const struct sun6i_rtc_match_data sun50i_r329_rtc_ccu_data = {
static const struct of_device_id sun6i_rtc_ccu_match[] = {
{
- .compatible = "allwinner,sun50i-h6-rtc",
- .data = &sun50i_h6_rtc_ccu_data,
- },
- {
.compatible = "allwinner,sun50i-h616-rtc",
.data = &sun50i_h616_rtc_ccu_data,
},
@@ -346,6 +332,7 @@ static const struct of_device_id sun6i_rtc_ccu_match[] = {
.compatible = "allwinner,sun50i-r329-rtc",
.data = &sun50i_r329_rtc_ccu_data,
},
+ {},
};
int sun6i_rtc_ccu_probe(struct device *dev, void __iomem *reg)
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index 542b31d6e96d..636bcf2439ef 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -109,6 +109,8 @@ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
spin_lock_init(&data->lock);
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r)
+ return -EINVAL;
/* one clock/reset pair per word */
count = DIV_ROUND_UP((resource_size(r)), SUN9I_MMC_WIDTH);
data->membase = devm_ioremap_resource(&pdev->dev, r);
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f9d593ff4718..0253731d6d25 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -24,13 +24,17 @@
#define CLK_HW_DIV 2
#define LUT_TURBO_IND 1
+#define GT_IRQ_STATUS BIT(2)
+
#define HZ_PER_KHZ 1000
struct qcom_cpufreq_soc_data {
u32 reg_enable;
+ u32 reg_domain_state;
u32 reg_dcvs_ctrl;
u32 reg_freq_lut;
u32 reg_volt_lut;
+ u32 reg_intr_clr;
u32 reg_current_vote;
u32 reg_perf_state;
u8 lut_row_size;
@@ -280,37 +284,46 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
}
}
-static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
+static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
{
- unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
+ unsigned int lval;
+
+ if (data->soc_data->reg_current_vote)
+ lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff;
+ else
+ lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff;
- return (val & 0x3FF) * 19200;
+ return lval * xo_rate;
}
static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
{
struct cpufreq_policy *policy = data->policy;
- int cpu = cpumask_first(policy->cpus);
+ int cpu = cpumask_first(policy->related_cpus);
struct device *dev = get_cpu_device(cpu);
unsigned long freq_hz, throttled_freq;
struct dev_pm_opp *opp;
- unsigned int freq;
/*
* Get the h/w throttled frequency, normalize it using the
* registered opp table and use it to calculate thermal pressure.
*/
- freq = qcom_lmh_get_throttle_freq(data);
- freq_hz = freq * HZ_PER_KHZ;
+ freq_hz = qcom_lmh_get_throttle_freq(data);
opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
- dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+
+ if (IS_ERR(opp)) {
+ dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp);
+ } else {
+ throttled_freq = freq_hz / HZ_PER_KHZ;
- throttled_freq = freq_hz / HZ_PER_KHZ;
+ /* Update thermal pressure (the boost frequencies are accepted) */
+ arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
- /* Update thermal pressure (the boost frequencies are accepted) */
- arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
+ dev_pm_opp_put(opp);
+ }
/*
* In the unlikely case policy is unregistered do not enable
@@ -350,6 +363,10 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
disable_irq_nosync(c_data->throttle_irq);
schedule_delayed_work(&c_data->throttle_work, 0);
+ if (c_data->soc_data->reg_intr_clr)
+ writel_relaxed(GT_IRQ_STATUS,
+ c_data->base + c_data->soc_data->reg_intr_clr);
+
return IRQ_HANDLED;
}
@@ -365,9 +382,11 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = {
static const struct qcom_cpufreq_soc_data epss_soc_data = {
.reg_enable = 0x0,
+ .reg_domain_state = 0x20,
.reg_dcvs_ctrl = 0xb0,
.reg_freq_lut = 0x100,
.reg_volt_lut = 0x200,
+ .reg_intr_clr = 0x308,
.reg_perf_state = 0x320,
.lut_row_size = 4,
};
@@ -417,16 +436,39 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
return 0;
}
-static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data)
+static int qcom_cpufreq_hw_cpu_online(struct cpufreq_policy *policy)
+{
+ struct qcom_cpufreq_data *data = policy->driver_data;
+ struct platform_device *pdev = cpufreq_get_driver_data();
+ int ret;
+
+ ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus);
+ if (ret)
+ dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n",
+ data->irq_name, data->throttle_irq);
+
+ return ret;
+}
+
+static int qcom_cpufreq_hw_cpu_offline(struct cpufreq_policy *policy)
{
+ struct qcom_cpufreq_data *data = policy->driver_data;
+
if (data->throttle_irq <= 0)
- return;
+ return 0;
mutex_lock(&data->throttle_lock);
data->cancel_throttle = true;
mutex_unlock(&data->throttle_lock);
cancel_delayed_work_sync(&data->throttle_work);
+ irq_set_affinity_hint(data->throttle_irq, NULL);
+
+ return 0;
+}
+
+static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data)
+{
free_irq(data->throttle_irq, data);
}
@@ -583,6 +625,8 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = {
.get = qcom_cpufreq_hw_get,
.init = qcom_cpufreq_hw_cpu_init,
.exit = qcom_cpufreq_hw_cpu_exit,
+ .online = qcom_cpufreq_hw_cpu_online,
+ .offline = qcom_cpufreq_hw_cpu_offline,
.register_em = cpufreq_register_em_with_opp,
.fast_switch = qcom_cpufreq_hw_fast_switch,
.name = "qcom-cpufreq-hw",
diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
index 2deed8d8773f..75e1bf3a08f7 100644
--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
@@ -98,8 +98,10 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev)
return -ENOMEM;
ret = sun50i_cpufreq_get_efuse(&speed);
- if (ret)
+ if (ret) {
+ kfree(opp_tables);
return ret;
+ }
snprintf(name, MAX_NAME_LEN, "speed%d", speed);
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index b459eda2cd37..5c852e671992 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -22,6 +22,7 @@
#include <linux/pm_runtime.h>
#include <asm/cpuidle.h>
#include <asm/sbi.h>
+#include <asm/smp.h>
#include <asm/suspend.h>
#include "dt_idle_states.h"
diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
index 11f30fd48c14..031b5f701a0a 100644
--- a/drivers/crypto/qcom-rng.c
+++ b/drivers/crypto/qcom-rng.c
@@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max)
} else {
/* copy only remaining bytes */
memcpy(data, &val, max - currsize);
+ break;
}
} while (currsize < max);
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index df23239b04fc..53297a0d9c57 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -407,6 +407,7 @@ static inline int is_dma_buf_file(struct file *file)
static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
{
+ static atomic64_t dmabuf_inode = ATOMIC64_INIT(0);
struct file *file;
struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
@@ -416,6 +417,13 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
inode->i_size = dmabuf->size;
inode_set_bytes(inode, dmabuf->size);
+ /*
+ * The ->i_ino acquired from get_next_ino() is not unique thus
+ * not suitable for using it as dentry name by dmabuf stats.
+ * Override ->i_ino with the unique and dmabuffs specific
+ * value.
+ */
+ inode->i_ino = atomic64_add_return(1, &dmabuf_inode);
file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
flags, &dma_buf_fops);
if (IS_ERR(file))
@@ -543,10 +551,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
file->f_mode |= FMODE_LSEEK;
dmabuf->file = file;
- ret = dma_buf_stats_setup(dmabuf);
- if (ret)
- goto err_sysfs;
-
mutex_init(&dmabuf->lock);
INIT_LIST_HEAD(&dmabuf->attachments);
@@ -554,6 +558,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
list_add(&dmabuf->list_node, &db_list.head);
mutex_unlock(&db_list.lock);
+ ret = dma_buf_stats_setup(dmabuf);
+ if (ret)
+ goto err_sysfs;
+
return dmabuf;
err_sysfs:
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 1476156af74b..def564d1e8fa 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1453,7 +1453,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
{
struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
- struct at_xdmac_desc *desc, *_desc;
+ struct at_xdmac_desc *desc, *_desc, *iter;
struct list_head *descs_list;
enum dma_status ret;
int residue, retry;
@@ -1568,11 +1568,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
* microblock.
*/
descs_list = &desc->descs_list;
- list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
- dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
- residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
- if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
+ list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
+ dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
+ residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
+ if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
+ desc = iter;
break;
+ }
}
residue += cur_ubc << dwidth;
diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c
index 329fc2e57b70..33bc1e6c4cf2 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
@@ -414,14 +414,18 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
SET_CH_32(dw, chan->dir, chan->id, ch_control1,
(DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
/* Linked list */
+
#ifdef CONFIG_64BIT
- SET_CH_64(dw, chan->dir, chan->id, llp.reg,
- chunk->ll_region.paddr);
+ /* llp is not aligned on 64bit -> keep 32bit accesses */
+ SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+ lower_32_bits(chunk->ll_region.paddr));
+ SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+ upper_32_bits(chunk->ll_region.paddr));
#else /* CONFIG_64BIT */
- SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
- lower_32_bits(chunk->ll_region.paddr));
- SET_CH_32(dw, chan->dir, chan->id, llp.msb,
- upper_32_bits(chunk->ll_region.paddr));
+ SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+ lower_32_bits(chunk->ll_region.paddr));
+ SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+ upper_32_bits(chunk->ll_region.paddr));
#endif /* CONFIG_64BIT */
}
/* Doorbell */
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 3061fe857d69..f652da6ab47d 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -373,7 +373,6 @@ static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
{
lockdep_assert_held(&wq->wq_lock);
- idxd_wq_disable_cleanup(wq);
wq->size = 0;
wq->group = NULL;
}
@@ -701,14 +700,17 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
if (wq->state == IDXD_WQ_ENABLED) {
idxd_wq_disable_cleanup(wq);
- idxd_wq_device_reset_cleanup(wq);
wq->state = IDXD_WQ_DISABLED;
}
+ idxd_wq_device_reset_cleanup(wq);
}
}
void idxd_device_clear_state(struct idxd_device *idxd)
{
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return;
+
idxd_groups_clear_state(idxd);
idxd_engines_clear_state(idxd);
idxd_device_wqs_clear_state(idxd);
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index e289fd48711a..c01db23e3333 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -150,14 +150,15 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
*/
int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
{
- int rc, retries = 0;
+ unsigned int retries = wq->enqcmds_retries;
+ int rc;
do {
rc = enqcmds(portal, desc);
if (rc == 0)
break;
cpu_relax();
- } while (retries++ < wq->enqcmds_retries);
+ } while (retries--);
return rc;
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 7e19ab92b61a..dfd549685c46 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -905,6 +905,9 @@ static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attr
u64 xfer_size;
int rc;
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
if (wq->state != IDXD_WQ_DISABLED)
return -EPERM;
@@ -939,6 +942,9 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu
u64 batch_size;
int rc;
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
if (wq->state != IDXD_WQ_DISABLED)
return -EPERM;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 70c0aa931ddf..6196a7b3956b 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -198,12 +198,12 @@ struct sdma_script_start_addrs {
s32 per_2_firi_addr;
s32 mcu_2_firi_addr;
s32 uart_2_per_addr;
- s32 uart_2_mcu_ram_addr;
+ s32 uart_2_mcu_addr;
s32 per_2_app_addr;
s32 mcu_2_app_addr;
s32 per_2_per_addr;
s32 uartsh_2_per_addr;
- s32 uartsh_2_mcu_ram_addr;
+ s32 uartsh_2_mcu_addr;
s32 per_2_shp_addr;
s32 mcu_2_shp_addr;
s32 ata_2_mcu_addr;
@@ -232,8 +232,8 @@ struct sdma_script_start_addrs {
s32 mcu_2_ecspi_addr;
s32 mcu_2_sai_addr;
s32 sai_2_mcu_addr;
- s32 uart_2_mcu_addr;
- s32 uartsh_2_mcu_addr;
+ s32 uart_2_mcu_rom_addr;
+ s32 uartsh_2_mcu_rom_addr;
/* End of v3 array */
s32 mcu_2_zqspi_addr;
/* End of v4 array */
@@ -1796,17 +1796,17 @@ static void sdma_add_scripts(struct sdma_engine *sdma,
saddr_arr[i] = addr_arr[i];
/*
- * get uart_2_mcu_addr/uartsh_2_mcu_addr rom script specially because
- * they are now replaced by uart_2_mcu_ram_addr/uartsh_2_mcu_ram_addr
- * to be compatible with legacy freescale/nxp sdma firmware, and they
- * are located in the bottom part of sdma_script_start_addrs which are
- * beyond the SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1.
+ * For compatibility with NXP internal legacy kernel before 4.19 which
+ * is based on uart ram script and mainline kernel based on uart rom
+ * script, both uart ram/rom scripts are present in newer sdma
+ * firmware. Use the rom versions if they are present (V3 or newer).
*/
- if (addr->uart_2_mcu_addr)
- sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_addr;
- if (addr->uartsh_2_mcu_addr)
- sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_addr;
-
+ if (sdma->script_number >= SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3) {
+ if (addr->uart_2_mcu_rom_addr)
+ sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_rom_addr;
+ if (addr->uartsh_2_mcu_rom_addr)
+ sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_rom_addr;
+ }
}
static void sdma_load_firmware(const struct firmware *fw, void *context)
@@ -1885,7 +1885,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
u32 reg, val, shift, num_map, i;
int ret = 0;
- if (IS_ERR(np) || IS_ERR(gpr_np))
+ if (IS_ERR(np) || !gpr_np)
goto out;
event_remap = of_find_property(np, propname, NULL);
@@ -1933,7 +1933,7 @@ static int sdma_event_remap(struct sdma_engine *sdma)
}
out:
- if (!IS_ERR(gpr_np))
+ if (gpr_np)
of_node_put(gpr_np);
return ret;
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index 375e7e647df6..a1517ef1f4a0 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
unsigned int status;
int ret;
- ret = pm_runtime_get_sync(mtkd->ddev.dev);
+ ret = pm_runtime_resume_and_get(mtkd->ddev.dev);
if (ret < 0) {
pm_runtime_put_noidle(chan->device->dev);
return ret;
@@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
ret = readx_poll_timeout(readl, c->base + VFF_EN,
status, !status, 10, 100);
if (ret)
- return ret;
+ goto err_pm;
ret = request_irq(c->irq, mtk_uart_apdma_irq_handler,
IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan);
if (ret < 0) {
dev_err(chan->device->dev, "Can't request dma IRQ\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_pm;
}
if (mtkd->support_33bits)
mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B);
+err_pm:
+ pm_runtime_put_noidle(mtkd->ddev.dev);
return ret;
}
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index f05ff02c0656..40b1abeca856 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -164,6 +164,11 @@
#define ECC_STAT_CECNT_SHIFT 8
#define ECC_STAT_BITNUM_MASK 0x7F
+/* ECC error count register definitions */
+#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000
+#define ECC_ERRCNT_UECNT_SHIFT 16
+#define ECC_ERRCNT_CECNT_MASK 0xFFFF
+
/* DDR QOS Interrupt register definitions */
#define DDR_QOS_IRQ_STAT_OFST 0x20200
#define DDR_QOSUE_MASK 0x4
@@ -423,15 +428,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
base = priv->baseaddr;
p = &priv->stat;
+ regval = readl(base + ECC_ERRCNT_OFST);
+ p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
+ p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
+ if (!p->ce_cnt)
+ goto ue_err;
+
regval = readl(base + ECC_STAT_OFST);
if (!regval)
return 1;
- p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
- p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
- if (!p->ce_cnt)
- goto ue_err;
-
p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
regval = readl(base + ECC_CEADDR0_OFST);
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 54be88167c60..f3b3953cac83 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -668,6 +668,7 @@ EXPORT_SYMBOL_GPL(fw_card_release);
void fw_core_remove_card(struct fw_card *card)
{
struct fw_card_driver dummy_driver = dummy_driver_template;
+ unsigned long flags;
card->driver->update_phy_reg(card, 4,
PHY_LINK_ACTIVE | PHY_CONTENDER, 0);
@@ -682,7 +683,9 @@ void fw_core_remove_card(struct fw_card *card)
dummy_driver.stop_iso = card->driver->stop_iso;
card->driver = &dummy_driver;
+ spin_lock_irqsave(&card->lock, flags);
fw_destroy_nodes(card);
+ spin_unlock_irqrestore(&card->lock, flags);
/* Wait for all users, especially device workqueue jobs, to finish. */
fw_card_put(card);
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 9f89c17730b1..708e417200f4 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1500,6 +1500,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
{
struct outbound_phy_packet_event *e =
container_of(packet, struct outbound_phy_packet_event, p);
+ struct client *e_client;
switch (status) {
/* expected: */
@@ -1516,9 +1517,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet,
}
e->phy_packet.data[0] = packet->timestamp;
+ e_client = e->client;
queue_event(e->client, &e->event, &e->phy_packet,
sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0);
- client_put(e->client);
+ client_put(e_client);
}
static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg)
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index b63d55f5ebd3..f40c81534381 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -375,16 +375,13 @@ static void report_found_node(struct fw_card *card,
card->bm_retries = 0;
}
+/* Must be called with card->lock held */
void fw_destroy_nodes(struct fw_card *card)
{
- unsigned long flags;
-
- spin_lock_irqsave(&card->lock, flags);
card->color++;
if (card->local_node != NULL)
for_each_fw_node(card, card->local_node, report_lost_node);
card->local_node = NULL;
- spin_unlock_irqrestore(&card->lock, flags);
}
static void move_tree(struct fw_node *node0, struct fw_node *node1, int port)
@@ -510,6 +507,8 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
struct fw_node *local_node;
unsigned long flags;
+ spin_lock_irqsave(&card->lock, flags);
+
/*
* If the selfID buffer is not the immediate successor of the
* previously processed one, we cannot reliably compare the
@@ -521,8 +520,6 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation,
card->bm_retries = 0;
}
- spin_lock_irqsave(&card->lock, flags);
-
card->broadcast_channel_allocated = card->broadcast_channel_auto_allocated;
card->node_id = node_id;
/*
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index ac487c96bb71..6c20815cc8d1 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -73,24 +73,25 @@ static int try_cancel_split_timeout(struct fw_transaction *t)
static int close_transaction(struct fw_transaction *transaction,
struct fw_card *card, int rcode)
{
- struct fw_transaction *t;
+ struct fw_transaction *t = NULL, *iter;
unsigned long flags;
spin_lock_irqsave(&card->lock, flags);
- list_for_each_entry(t, &card->transaction_list, link) {
- if (t == transaction) {
- if (!try_cancel_split_timeout(t)) {
+ list_for_each_entry(iter, &card->transaction_list, link) {
+ if (iter == transaction) {
+ if (!try_cancel_split_timeout(iter)) {
spin_unlock_irqrestore(&card->lock, flags);
goto timed_out;
}
- list_del_init(&t->link);
- card->tlabel_mask &= ~(1ULL << t->tlabel);
+ list_del_init(&iter->link);
+ card->tlabel_mask &= ~(1ULL << iter->tlabel);
+ t = iter;
break;
}
}
spin_unlock_irqrestore(&card->lock, flags);
- if (&t->link != &card->transaction_list) {
+ if (t) {
t->callback(card, rcode, NULL, 0, t->callback_data);
return 0;
}
@@ -935,7 +936,7 @@ EXPORT_SYMBOL(fw_core_handle_request);
void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
{
- struct fw_transaction *t;
+ struct fw_transaction *t = NULL, *iter;
unsigned long flags;
u32 *data;
size_t data_length;
@@ -947,20 +948,21 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
rcode = HEADER_GET_RCODE(p->header[1]);
spin_lock_irqsave(&card->lock, flags);
- list_for_each_entry(t, &card->transaction_list, link) {
- if (t->node_id == source && t->tlabel == tlabel) {
- if (!try_cancel_split_timeout(t)) {
+ list_for_each_entry(iter, &card->transaction_list, link) {
+ if (iter->node_id == source && iter->tlabel == tlabel) {
+ if (!try_cancel_split_timeout(iter)) {
spin_unlock_irqrestore(&card->lock, flags);
goto timed_out;
}
- list_del_init(&t->link);
- card->tlabel_mask &= ~(1ULL << t->tlabel);
+ list_del_init(&iter->link);
+ card->tlabel_mask &= ~(1ULL << iter->tlabel);
+ t = iter;
break;
}
}
spin_unlock_irqrestore(&card->lock, flags);
- if (&t->link == &card->transaction_list) {
+ if (!t) {
timed_out:
fw_notice(card, "unsolicited response (source %x, tlabel %x)\n",
source, tlabel);
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 85cd379fd383..60051c0cabea 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -408,7 +408,7 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
void *payload, size_t length, void *callback_data)
{
struct sbp2_logical_unit *lu = callback_data;
- struct sbp2_orb *orb;
+ struct sbp2_orb *orb = NULL, *iter;
struct sbp2_status status;
unsigned long flags;
@@ -433,17 +433,18 @@ static void sbp2_status_write(struct fw_card *card, struct fw_request *request,
/* Lookup the orb corresponding to this status write. */
spin_lock_irqsave(&lu->tgt->lock, flags);
- list_for_each_entry(orb, &lu->orb_list, link) {
+ list_for_each_entry(iter, &lu->orb_list, link) {
if (STATUS_GET_ORB_HIGH(status) == 0 &&
- STATUS_GET_ORB_LOW(status) == orb->request_bus) {
- orb->rcode = RCODE_COMPLETE;
- list_del(&orb->link);
+ STATUS_GET_ORB_LOW(status) == iter->request_bus) {
+ iter->rcode = RCODE_COMPLETE;
+ list_del(&iter->link);
+ orb = iter;
break;
}
}
spin_unlock_irqrestore(&lu->tgt->lock, flags);
- if (&orb->link != &lu->orb_list) {
+ if (orb) {
orb->callback(orb, &status);
kref_put(&orb->kref, free_orb); /* orb callback reference */
} else {
diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c
index e48108e694f8..7dad6f57d970 100644
--- a/drivers/firmware/cirrus/cs_dsp.c
+++ b/drivers/firmware/cirrus/cs_dsp.c
@@ -955,8 +955,7 @@ static int cs_dsp_create_control(struct cs_dsp *dsp,
ctl->alg_region = *alg_region;
if (subname && dsp->fw_ver >= 2) {
ctl->subname_len = subname_len;
- ctl->subname = kmemdup(subname,
- strlen(subname) + 1, GFP_KERNEL);
+ ctl->subname = kasprintf(GFP_KERNEL, "%.*s", subname_len, subname);
if (!ctl->subname) {
ret = -ENOMEM;
goto err_ctl;
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 2c3dac5ecb36..4720ba98cec3 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -91,6 +91,18 @@ config EFI_SOFT_RESERVE
If unsure, say Y.
+config EFI_DXE_MEM_ATTRIBUTES
+ bool "Adjust memory attributes in EFISTUB"
+ depends on EFI && EFI_STUB && X86
+ default y
+ help
+ UEFI specification does not guarantee all memory to be
+ accessible for both write and execute as the kernel expects
+ it to be.
+ Use DXE services to check and alter memory protection
+ attributes during boot via EFISTUB to ensure that memory
+ ranges used by the kernel are writable and executable.
+
config EFI_PARAMS_FROM_FDT
bool
help
@@ -284,3 +296,34 @@ config EFI_CUSTOM_SSDT_OVERLAYS
See Documentation/admin-guide/acpi/ssdt-overlays.rst for more
information.
+
+config EFI_DISABLE_RUNTIME
+ bool "Disable EFI runtime services support by default"
+ default y if PREEMPT_RT
+ help
+ Allow to disable the EFI runtime services support by default. This can
+ already be achieved by using the efi=noruntime option, but it could be
+ useful to have this default without any kernel command line parameter.
+
+ The EFI runtime services are disabled by default when PREEMPT_RT is
+ enabled, because measurements have shown that some EFI functions calls
+ might take too much time to complete, causing large latencies which is
+ an issue for Real-Time kernels.
+
+ This default can be overridden by using the efi=runtime option.
+
+config EFI_COCO_SECRET
+ bool "EFI Confidential Computing Secret Area Support"
+ depends on EFI
+ help
+ Confidential Computing platforms (such as AMD SEV) allow the
+ Guest Owner to securely inject secrets during guest VM launch.
+ The secrets are placed in a designated EFI reserved memory area.
+
+ In order to use the secrets in the kernel, the location of the secret
+ area (as published in the EFI config table) must be kept.
+
+ If you say Y here, the address of the EFI secret area will be kept
+ for usage inside the kernel. This will allow the
+ virt/coco/efi_secret module to access the secrets, which in turn
+ allows userspace programs to access the injected secrets.
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 5502e176d51b..860534bcfdac 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -46,6 +46,9 @@ struct efi __read_mostly efi = {
#ifdef CONFIG_LOAD_UEFI_KEYS
.mokvar_table = EFI_INVALID_TABLE_ADDR,
#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+ .coco_secret = EFI_INVALID_TABLE_ADDR,
+#endif
};
EXPORT_SYMBOL(efi);
@@ -66,7 +69,7 @@ struct mm_struct efi_mm = {
struct workqueue_struct *efi_rts_wq;
-static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
+static bool disable_runtime = IS_ENABLED(CONFIG_EFI_DISABLE_RUNTIME);
static int __init setup_noefi(char *arg)
{
disable_runtime = true;
@@ -422,6 +425,11 @@ static int __init efisubsys_init(void)
if (efi_enabled(EFI_DBG) && efi_enabled(EFI_PRESERVE_BS_REGIONS))
efi_debugfs_init();
+#ifdef CONFIG_EFI_COCO_SECRET
+ if (efi.coco_secret != EFI_INVALID_TABLE_ADDR)
+ platform_device_register_simple("efi_secret", 0, NULL, 0);
+#endif
+
return 0;
err_remove_group:
@@ -529,6 +537,9 @@ static const efi_config_table_type_t common_tables[] __initconst = {
#ifdef CONFIG_LOAD_UEFI_KEYS
{LINUX_EFI_MOK_VARIABLE_TABLE_GUID, &efi.mokvar_table, "MOKvar" },
#endif
+#ifdef CONFIG_EFI_COCO_SECRET
+ {LINUX_EFI_COCO_SECRET_AREA_GUID, &efi.coco_secret, "CocoSecret" },
+#endif
{},
};
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 4b5b2403b3a0..0131e3aaa605 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -117,7 +117,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
unsigned long *image_size,
unsigned long *reserve_addr,
unsigned long *reserve_size,
- efi_loaded_image_t *image)
+ efi_loaded_image_t *image,
+ efi_handle_t image_handle)
{
const int slack = TEXT_OFFSET - 5 * PAGE_SIZE;
int alloc_size = MAX_UNCOMP_KERNEL_SIZE + EFI_PHYS_ALIGN;
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 9cc556013d08..577173ee1f83 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -83,7 +83,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
unsigned long *image_size,
unsigned long *reserve_addr,
unsigned long *reserve_size,
- efi_loaded_image_t *image)
+ efi_loaded_image_t *image,
+ efi_handle_t image_handle)
{
efi_status_t status;
unsigned long kernel_size, kernel_memsize = 0;
@@ -100,7 +101,15 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
u64 min_kimg_align = efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- if (!efi_nokaslr) {
+ efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID;
+ void *p;
+
+ if (efi_nokaslr) {
+ efi_info("KASLR disabled on kernel command line\n");
+ } else if (efi_bs_call(handle_protocol, image_handle,
+ &li_fixed_proto, &p) == EFI_SUCCESS) {
+ efi_info("Image placement fixed by loader\n");
+ } else {
status = efi_get_random_bytes(sizeof(phys_seed),
(u8 *)&phys_seed);
if (status == EFI_NOT_FOUND) {
@@ -111,8 +120,6 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
status);
efi_nokaslr = true;
}
- } else {
- efi_info("KASLR disabled on kernel command line\n");
}
}
diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c
index da93864d7abc..f515394cce6e 100644
--- a/drivers/firmware/efi/libstub/efi-stub.c
+++ b/drivers/firmware/efi/libstub/efi-stub.c
@@ -198,7 +198,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
status = handle_kernel_image(&image_addr, &image_size,
&reserve_addr,
&reserve_size,
- image);
+ image, handle);
if (status != EFI_SUCCESS) {
efi_err("Failed to relocate kernel\n");
goto fail_free_screeninfo;
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index edb77b0621ea..b0ae0a454404 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -36,6 +36,9 @@ extern bool efi_novamap;
extern const efi_system_table_t *efi_system_table;
+typedef union efi_dxe_services_table efi_dxe_services_table_t;
+extern const efi_dxe_services_table_t *efi_dxe_table;
+
efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
efi_system_table_t *sys_table_arg);
@@ -44,6 +47,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
#define efi_is_native() (true)
#define efi_bs_call(func, ...) efi_system_table->boottime->func(__VA_ARGS__)
#define efi_rt_call(func, ...) efi_system_table->runtime->func(__VA_ARGS__)
+#define efi_dxe_call(func, ...) efi_dxe_table->func(__VA_ARGS__)
#define efi_table_attr(inst, attr) (inst->attr)
#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__)
@@ -329,6 +333,76 @@ union efi_boot_services {
} mixed_mode;
};
+typedef enum {
+ EfiGcdMemoryTypeNonExistent,
+ EfiGcdMemoryTypeReserved,
+ EfiGcdMemoryTypeSystemMemory,
+ EfiGcdMemoryTypeMemoryMappedIo,
+ EfiGcdMemoryTypePersistent,
+ EfiGcdMemoryTypeMoreReliable,
+ EfiGcdMemoryTypeMaximum
+} efi_gcd_memory_type_t;
+
+typedef struct {
+ efi_physical_addr_t base_address;
+ u64 length;
+ u64 capabilities;
+ u64 attributes;
+ efi_gcd_memory_type_t gcd_memory_type;
+ void *image_handle;
+ void *device_handle;
+} efi_gcd_memory_space_desc_t;
+
+/*
+ * EFI DXE Services table
+ */
+union efi_dxe_services_table {
+ struct {
+ efi_table_hdr_t hdr;
+ void *add_memory_space;
+ void *allocate_memory_space;
+ void *free_memory_space;
+ void *remove_memory_space;
+ efi_status_t (__efiapi *get_memory_space_descriptor)(efi_physical_addr_t,
+ efi_gcd_memory_space_desc_t *);
+ efi_status_t (__efiapi *set_memory_space_attributes)(efi_physical_addr_t,
+ u64, u64);
+ void *get_memory_space_map;
+ void *add_io_space;
+ void *allocate_io_space;
+ void *free_io_space;
+ void *remove_io_space;
+ void *get_io_space_descriptor;
+ void *get_io_space_map;
+ void *dispatch;
+ void *schedule;
+ void *trust;
+ void *process_firmware_volume;
+ void *set_memory_space_capabilities;
+ };
+ struct {
+ efi_table_hdr_t hdr;
+ u32 add_memory_space;
+ u32 allocate_memory_space;
+ u32 free_memory_space;
+ u32 remove_memory_space;
+ u32 get_memory_space_descriptor;
+ u32 set_memory_space_attributes;
+ u32 get_memory_space_map;
+ u32 add_io_space;
+ u32 allocate_io_space;
+ u32 free_io_space;
+ u32 remove_io_space;
+ u32 get_io_space_descriptor;
+ u32 get_io_space_map;
+ u32 dispatch;
+ u32 schedule;
+ u32 trust;
+ u32 process_firmware_volume;
+ u32 set_memory_space_capabilities;
+ } mixed_mode;
+};
+
typedef union efi_uga_draw_protocol efi_uga_draw_protocol_t;
union efi_uga_draw_protocol {
@@ -720,6 +794,13 @@ union efi_tcg2_protocol {
} mixed_mode;
};
+struct riscv_efi_boot_protocol {
+ u64 revision;
+
+ efi_status_t (__efiapi *get_boot_hartid)(struct riscv_efi_boot_protocol *,
+ unsigned long *boot_hartid);
+};
+
typedef union efi_load_file_protocol efi_load_file_protocol_t;
typedef union efi_load_file_protocol efi_load_file2_protocol_t;
@@ -865,7 +946,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
unsigned long *image_size,
unsigned long *reserve_addr,
unsigned long *reserve_size,
- efi_loaded_image_t *image);
+ efi_loaded_image_t *image,
+ efi_handle_t image_handle);
asmlinkage void __noreturn efi_enter_kernel(unsigned long entrypoint,
unsigned long fdt_addr,
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 724155b9e10d..715f37479154 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -56,6 +56,7 @@ efi_status_t efi_random_alloc(unsigned long size,
unsigned long random_seed)
{
unsigned long map_size, desc_size, total_slots = 0, target_slot;
+ unsigned long total_mirrored_slots = 0;
unsigned long buff_size;
efi_status_t status;
efi_memory_desc_t *memory_map;
@@ -86,8 +87,14 @@ efi_status_t efi_random_alloc(unsigned long size,
slots = get_entry_num_slots(md, size, ilog2(align));
MD_NUM_SLOTS(md) = slots;
total_slots += slots;
+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
+ total_mirrored_slots += slots;
}
+ /* consider only mirrored slots for randomization if any exist */
+ if (total_mirrored_slots > 0)
+ total_slots = total_mirrored_slots;
+
/* find a random number between 0 and total_slots */
target_slot = (total_slots * (u64)(random_seed & U32_MAX)) >> 32;
@@ -107,6 +114,10 @@ efi_status_t efi_random_alloc(unsigned long size,
efi_physical_addr_t target;
unsigned long pages;
+ if (total_mirrored_slots > 0 &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
if (target_slot >= MD_NUM_SLOTS(md)) {
target_slot -= MD_NUM_SLOTS(md);
continue;
diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c
index 9c460843442f..9e85e58d1f27 100644
--- a/drivers/firmware/efi/libstub/riscv-stub.c
+++ b/drivers/firmware/efi/libstub/riscv-stub.c
@@ -21,9 +21,9 @@
#define MIN_KIMG_ALIGN SZ_4M
#endif
-typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long);
+typedef void __noreturn (*jump_kernel_func)(unsigned long, unsigned long);
-static u32 hartid;
+static unsigned long hartid;
static int get_boot_hartid_from_fdt(void)
{
@@ -47,14 +47,31 @@ static int get_boot_hartid_from_fdt(void)
return 0;
}
+static efi_status_t get_boot_hartid_from_efi(void)
+{
+ efi_guid_t boot_protocol_guid = RISCV_EFI_BOOT_PROTOCOL_GUID;
+ struct riscv_efi_boot_protocol *boot_protocol;
+ efi_status_t status;
+
+ status = efi_bs_call(locate_protocol, &boot_protocol_guid, NULL,
+ (void **)&boot_protocol);
+ if (status != EFI_SUCCESS)
+ return status;
+ return efi_call_proto(boot_protocol, get_boot_hartid, &hartid);
+}
+
efi_status_t check_platform_features(void)
{
+ efi_status_t status;
int ret;
- ret = get_boot_hartid_from_fdt();
- if (ret) {
- efi_err("/chosen/boot-hartid missing or invalid!\n");
- return EFI_UNSUPPORTED;
+ status = get_boot_hartid_from_efi();
+ if (status != EFI_SUCCESS) {
+ ret = get_boot_hartid_from_fdt();
+ if (ret) {
+ efi_err("Failed to get boot hartid!\n");
+ return EFI_UNSUPPORTED;
+ }
}
return EFI_SUCCESS;
}
@@ -80,7 +97,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
unsigned long *image_size,
unsigned long *reserve_addr,
unsigned long *reserve_size,
- efi_loaded_image_t *image)
+ efi_loaded_image_t *image,
+ efi_handle_t image_handle)
{
unsigned long kernel_size = 0;
unsigned long preferred_addr;
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 01ddd4502e28..b14e88ccefca 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -22,6 +22,7 @@
#define MAXMEM_X86_64_4LEVEL (1ull << 46)
const efi_system_table_t *efi_system_table;
+const efi_dxe_services_table_t *efi_dxe_table;
extern u32 image_offset;
static efi_loaded_image_t *image = NULL;
@@ -211,9 +212,110 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
}
}
+static void
+adjust_memory_range_protection(unsigned long start, unsigned long size)
+{
+ efi_status_t status;
+ efi_gcd_memory_space_desc_t desc;
+ unsigned long end, next;
+ unsigned long rounded_start, rounded_end;
+ unsigned long unprotect_start, unprotect_size;
+ int has_system_memory = 0;
+
+ if (efi_dxe_table == NULL)
+ return;
+
+ rounded_start = rounddown(start, EFI_PAGE_SIZE);
+ rounded_end = roundup(start + size, EFI_PAGE_SIZE);
+
+ /*
+ * Don't modify memory region attributes, they are
+ * already suitable, to lower the possibility to
+ * encounter firmware bugs.
+ */
+
+ for (end = start + size; start < end; start = next) {
+
+ status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
+
+ if (status != EFI_SUCCESS)
+ return;
+
+ next = desc.base_address + desc.length;
+
+ /*
+ * Only system memory is suitable for trampoline/kernel image placement,
+ * so only this type of memory needs its attributes to be modified.
+ */
+
+ if (desc.gcd_memory_type != EfiGcdMemoryTypeSystemMemory ||
+ (desc.attributes & (EFI_MEMORY_RO | EFI_MEMORY_XP)) == 0)
+ continue;
+
+ unprotect_start = max(rounded_start, (unsigned long)desc.base_address);
+ unprotect_size = min(rounded_end, next) - unprotect_start;
+
+ status = efi_dxe_call(set_memory_space_attributes,
+ unprotect_start, unprotect_size,
+ EFI_MEMORY_WB);
+
+ if (status != EFI_SUCCESS) {
+ efi_warn("Unable to unprotect memory range [%08lx,%08lx]: %d\n",
+ unprotect_start,
+ unprotect_start + unprotect_size,
+ (int)status);
+ }
+ }
+}
+
+/*
+ * Trampoline takes 2 pages and can be loaded in first megabyte of memory
+ * with its end placed between 128k and 640k where BIOS might start.
+ * (see arch/x86/boot/compressed/pgtable_64.c)
+ *
+ * We cannot find exact trampoline placement since memory map
+ * can be modified by UEFI, and it can alter the computed address.
+ */
+
+#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
+#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
+
+void startup_32(struct boot_params *boot_params);
+
+static void
+setup_memory_protection(unsigned long image_base, unsigned long image_size)
+{
+ /*
+ * Allow execution of possible trampoline used
+ * for switching between 4- and 5-level page tables
+ * and relocated kernel image.
+ */
+
+ adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
+ TRAMPOLINE_PLACEMENT_SIZE);
+
+#ifdef CONFIG_64BIT
+ if (image_base != (unsigned long)startup_32)
+ adjust_memory_range_protection(image_base, image_size);
+#else
+ /*
+ * Clear protection flags on a whole range of possible
+ * addresses used for KASLR. We don't need to do that
+ * on x86_64, since KASLR/extraction is performed after
+ * dedicated identity page tables are built and we only
+ * need to remove possible protection on relocated image
+ * itself disregarding further relocations.
+ */
+ adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
+ KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
+#endif
+}
+
static const efi_char16_t apple[] = L"Apple";
-static void setup_quirks(struct boot_params *boot_params)
+static void setup_quirks(struct boot_params *boot_params,
+ unsigned long image_base,
+ unsigned long image_size)
{
efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
efi_table_attr(efi_system_table, fw_vendor);
@@ -222,6 +324,9 @@ static void setup_quirks(struct boot_params *boot_params)
if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
retrieve_apple_device_properties(boot_params);
}
+
+ if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
+ setup_memory_protection(image_base, image_size);
}
/*
@@ -341,8 +446,6 @@ static void __noreturn efi_exit(efi_handle_t handle, efi_status_t status)
asm("hlt");
}
-void startup_32(struct boot_params *boot_params);
-
void __noreturn efi_stub_entry(efi_handle_t handle,
efi_system_table_t *sys_table_arg,
struct boot_params *boot_params);
@@ -677,11 +780,17 @@ unsigned long efi_main(efi_handle_t handle,
efi_status_t status;
efi_system_table = sys_table_arg;
-
/* Check if we were booted by the EFI firmware */
if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
efi_exit(handle, EFI_INVALID_PARAMETER);
+ efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+ if (efi_dxe_table &&
+ efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+ efi_warn("Ignoring DXE services table: invalid signature\n");
+ efi_dxe_table = NULL;
+ }
+
/*
* If the kernel isn't already loaded at a suitable address,
* relocate it.
@@ -791,7 +900,7 @@ unsigned long efi_main(efi_handle_t handle,
setup_efi_pci(boot_params);
- setup_quirks(boot_params);
+ setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS) {
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 4c1f9e1091b7..2db19cd640a4 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -707,6 +707,9 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
unsigned long flags;
unsigned int on, off;
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
val = (unsigned long long) mvpwm->clk_rate * state->duty_cycle;
do_div(val, NSEC_PER_SEC);
if (val > UINT_MAX + 1ULL)
@@ -871,13 +874,6 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
mvpwm->chip.dev = dev;
mvpwm->chip.ops = &mvebu_pwm_ops;
mvpwm->chip.npwm = mvchip->chip.ngpio;
- /*
- * There may already be some PWM allocated, so we can't force
- * mvpwm->chip.base to a fixed point like mvchip->chip.base.
- * So, we let pwmchip_add() do the numbering and take the next free
- * region.
- */
- mvpwm->chip.base = -1;
spin_lock_init(&mvpwm->lock);
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index d2fe76f3f34f..8726921a1129 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -762,11 +762,11 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
bitmap_xor(cur_stat, new_stat, old_stat, gc->ngpio);
bitmap_and(trigger, cur_stat, chip->irq_mask, gc->ngpio);
+ bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
+
if (bitmap_empty(trigger, gc->ngpio))
return false;
- bitmap_copy(chip->irq_stat, new_stat, gc->ngpio);
-
bitmap_and(cur_stat, chip->irq_trig_fall, old_stat, gc->ngpio);
bitmap_and(old_stat, chip->irq_trig_raise, new_stat, gc->ngpio);
bitmap_or(new_stat, old_stat, cur_stat, gc->ngpio);
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 20780c35da1b..23cddb265a0d 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -125,9 +125,13 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
{
struct vf610_gpio_port *port = gpiochip_get_data(chip);
unsigned long mask = BIT(gpio);
+ u32 val;
- if (port->sdata && port->sdata->have_paddr)
- vf610_gpio_writel(mask, port->gpio_base + GPIO_PDDR);
+ if (port->sdata && port->sdata->have_paddr) {
+ val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR);
+ val |= mask;
+ vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR);
+ }
vf610_gpio_set(chip, gpio, value);
diff --git a/drivers/gpio/gpio-visconti.c b/drivers/gpio/gpio-visconti.c
index 47455810bdb9..e6534ea1eaa7 100644
--- a/drivers/gpio/gpio-visconti.c
+++ b/drivers/gpio/gpio-visconti.c
@@ -130,7 +130,6 @@ static int visconti_gpio_probe(struct platform_device *pdev)
struct gpio_irq_chip *girq;
struct irq_domain *parent;
struct device_node *irq_parent;
- struct fwnode_handle *fwnode;
int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -150,14 +149,12 @@ static int visconti_gpio_probe(struct platform_device *pdev)
}
parent = irq_find_host(irq_parent);
+ of_node_put(irq_parent);
if (!parent) {
dev_err(dev, "No IRQ parent domain\n");
return -ENODEV;
}
- fwnode = of_node_to_fwnode(irq_parent);
- of_node_put(irq_parent);
-
ret = bgpio_init(&priv->gpio_chip, dev, 4,
priv->base + GPIO_IDATA,
priv->base + GPIO_OSET,
@@ -180,7 +177,7 @@ static int visconti_gpio_probe(struct platform_device *pdev)
girq = &priv->gpio_chip.irq;
girq->chip = irq_chip;
- girq->fwnode = fwnode;
+ girq->fwnode = of_node_to_fwnode(dev->of_node);
girq->parent_domain = parent;
girq->child_to_parent_hwirq = visconti_gpio_child_to_parent_hwirq;
girq->populate_parent_alloc_arg = visconti_gpio_populate_parent_fwspec;
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index ae1ce319cd78..7e5e51d49d09 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -910,7 +910,7 @@ static void of_gpiochip_init_valid_mask(struct gpio_chip *chip)
i, &start);
of_property_read_u32_index(np, "gpio-reserved-ranges",
i + 1, &count);
- if (start >= chip->ngpio || start + count >= chip->ngpio)
+ if (start >= chip->ngpio || start + count > chip->ngpio)
continue;
bitmap_clear(chip->valid_mask, start, count);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index c813a6adbb6e..690035124faa 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1608,8 +1608,6 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
gpiochip_set_irq_hooks(gc);
- acpi_gpiochip_request_interrupts(gc);
-
/*
* Using barrier() here to prevent compiler from reordering
* gc->irq.initialized before initialization of above
@@ -1619,6 +1617,8 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
gc->irq.initialized = true;
+ acpi_gpiochip_request_interrupts(gc);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cdf0818088b3..7606e3b6361e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1342,9 +1342,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
#else
static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 0e12315fa0cb..98ac53ee6bb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1046,6 +1046,20 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
}
/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+}
+
+/**
* amdgpu_acpi_is_s0ix_active
*
* @adev: amdgpu_device_pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 970b065e9a6b..d0d0ea565e3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -128,6 +128,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_chunk;
}
+ mutex_lock(&p->ctx->lock);
+
/* skip guilty context job */
if (atomic_read(&p->ctx->guilty) == 1) {
ret = -ECANCELED;
@@ -709,6 +711,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
dma_fence_put(parser->fence);
if (parser->ctx) {
+ mutex_unlock(&parser->ctx->lock);
amdgpu_ctx_put(parser->ctx);
}
if (parser->bo_list)
@@ -1157,6 +1160,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
{
int i, r;
+ /* TODO: Investigate why we still need the context lock */
+ mutex_unlock(&p->ctx->lock);
+
for (i = 0; i < p->nchunks; ++i) {
struct amdgpu_cs_chunk *chunk;
@@ -1167,32 +1173,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
}
}
- return 0;
+out:
+ mutex_lock(&p->ctx->lock);
+ return r;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1368,6 +1376,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
r = amdgpu_cs_submit(&parser, cs);
+
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 5981c7d9bd48..c317078d1afd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -237,6 +237,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -295,6 +296,7 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
{
struct amdgpu_device *adev = ctx->adev;
enum amd_dpm_forced_level level;
+ u32 current_stable_pstate;
int r;
mutex_lock(&adev->pm.stable_pstate_ctx_lock);
@@ -303,6 +305,10 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
goto done;
}
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r || (stable_pstate == current_stable_pstate))
+ goto done;
+
switch (stable_pstate) {
case AMDGPU_CTX_STABLE_PSTATE_NONE:
level = AMD_DPM_FORCED_LEVEL_AUTO;
@@ -357,6 +363,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
drm_dev_exit(idx);
}
+ mutex_destroy(&ctx->lock);
kfree(ctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index d0cbfcea90f7..142f2f87d44c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -49,6 +49,7 @@ struct amdgpu_ctx {
bool preamble_presented;
int32_t init_priority;
int32_t override_priority;
+ struct mutex lock;
atomic_t guilty;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 29e9419a914b..46ef57b07c15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2336,7 +2336,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- if (!adev->in_s0ix)
+ if (amdgpu_acpi_should_gpu_reset(adev))
return amdgpu_asic_reset(adev);
return 0;
@@ -2395,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev)
return amdgpu_device_resume(drm_dev, true);
}
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (adev->mode_info.num_crtc) {
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ int ret = 0;
+
+ /* XXX: Return busy if any displays are connected to avoid
+ * possible display wakeups after runtime resume due to
+ * hotplug events in case any displays were connected while
+ * the GPU was in suspend. Remove this once that is fixed.
+ */
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->status == connector_status_connected) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+
+ if (ret)
+ return ret;
+
+ if (amdgpu_device_has_dc_support(adev)) {
+ struct drm_crtc *crtc;
+
+ drm_for_each_crtc(crtc, drm_dev) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
+ ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
+ break;
+ }
+ } else {
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->dpms == DRM_MODE_DPMS_ON) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2407,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
return -EBUSY;
}
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ return ret;
+
/* wait for all rings to drain before suspending */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -2516,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
return -EBUSY;
}
- if (amdgpu_device_has_dc_support(adev)) {
- struct drm_crtc *crtc;
-
- drm_for_each_crtc(crtc, drm_dev) {
- drm_modeset_lock(&crtc->mutex, NULL);
- if (crtc->state->active)
- ret = -EBUSY;
- drm_modeset_unlock(&crtc->mutex);
- if (ret < 0)
- break;
- }
-
- } else {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
-
- mutex_lock(&drm_dev->mode_config.mutex);
- drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
-
- drm_connector_list_iter_begin(drm_dev, &iter);
- drm_for_each_connector_iter(list_connector, &iter) {
- if (list_connector->dpms == DRM_MODE_DPMS_ON) {
- ret = -EBUSY;
- break;
- }
- }
-
- drm_connector_list_iter_end(&iter);
-
- drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
- mutex_unlock(&drm_dev->mode_config.mutex);
- }
-
- if (ret == -EBUSY)
- DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
+ ret = amdgpu_runtime_idle_check_display(dev);
pm_runtime_mark_last_busy(dev);
pm_runtime_autosuspend(dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a025f080aa6a..5e3756643da3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -24,6 +24,7 @@
#include <linux/module.h>
#include <drm/drm_drv.h>
+#include <xen/xen.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
@@ -710,7 +711,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
if (!reg) {
- if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
+ /* passthrough mode exclus sriov mod */
+ if (is_virtual_machine() && !xen_initial_domain())
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5228421b0f72..7c956cf21bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1151,6 +1151,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ /*
+ * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
+ * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
+ * seen any issue on the DF 3.0.2 series platform.
+ */
+ if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+ dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
+ return 0;
+ }
+
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..45f0188c4273 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,6 +81,10 @@
#include "mxgpu_vi.h"
#include "amdgpu_dm.h"
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
#define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
#define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L
#define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L
@@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
WREG32_PCIE(ixPCIE_LC_CNTL, data);
}
+static bool aspm_support_quirk_check(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+ return true;
+#endif
+}
+
static void vi_program_aspm(struct amdgpu_device *adev)
{
u32 data, data1, orig;
bool bL1SS = false;
bool bClkReqSupport = true;
- if (!amdgpu_device_should_use_aspm(adev))
+ if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
return;
if (adev->flags & AMD_IS_APU ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index acf4f7975850..198672264492 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -130,19 +130,33 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
}
static void increment_queue_count(struct device_queue_manager *dqm,
- enum kfd_queue_type type)
+ struct qcm_process_device *qpd,
+ struct queue *q)
{
dqm->active_queue_count++;
- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count++;
+
+ if (q->properties.is_gws) {
+ dqm->gws_queue_count++;
+ qpd->mapped_gws_queue = true;
+ }
}
static void decrement_queue_count(struct device_queue_manager *dqm,
- enum kfd_queue_type type)
+ struct qcm_process_device *qpd,
+ struct queue *q)
{
dqm->active_queue_count--;
- if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count--;
+
+ if (q->properties.is_gws) {
+ dqm->gws_queue_count--;
+ qpd->mapped_gws_queue = false;
+ }
}
/*
@@ -412,7 +426,7 @@ add_queue_to_list:
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active)
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, qpd, q);
/*
* Unconditionally increment this counter, regardless of the queue's
@@ -601,13 +615,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
deallocate_vmid(dqm, qpd, q);
}
qpd->queue_count--;
- if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
- }
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
return retval;
}
@@ -700,12 +709,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
* dqm->active_queue_count to determine whether a new runlist must be
* uploaded.
*/
- if (q->properties.is_active && !prev_active)
- increment_queue_count(dqm, q->properties.type);
- else if (!q->properties.is_active && prev_active)
- decrement_queue_count(dqm, q->properties.type);
-
- if (q->gws && !q->properties.is_gws) {
+ if (q->properties.is_active && !prev_active) {
+ increment_queue_count(dqm, &pdd->qpd, q);
+ } else if (!q->properties.is_active && prev_active) {
+ decrement_queue_count(dqm, &pdd->qpd, q);
+ } else if (q->gws && !q->properties.is_gws) {
if (q->properties.is_active) {
dqm->gws_queue_count++;
pdd->qpd.mapped_gws_queue = true;
@@ -767,11 +775,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
+ decrement_queue_count(dqm, qpd, q);
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
@@ -817,7 +821,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = false;
- decrement_queue_count(dqm, q->properties.type);
+ decrement_queue_count(dqm, qpd, q);
}
pdd->last_evict_timestamp = get_jiffies_64();
retval = execute_queues_cpsch(dqm,
@@ -888,11 +892,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
- increment_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count++;
- qpd->mapped_gws_queue = true;
- }
+ increment_queue_count(dqm, qpd, q);
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -950,7 +950,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
continue;
q->properties.is_active = true;
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, &pdd->qpd, q);
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1378,7 +1378,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
dqm->total_queue_count);
list_add(&kq->list, &qpd->priv_queue_list);
- increment_queue_count(dqm, kq->queue->properties.type);
+ increment_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = true;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
@@ -1392,7 +1392,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
{
dqm_lock(dqm);
list_del(&kq->list);
- decrement_queue_count(dqm, kq->queue->properties.type);
+ decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
/*
@@ -1467,7 +1467,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
qpd->queue_count++;
if (q->properties.is_active) {
- increment_queue_count(dqm, q->properties.type);
+ increment_queue_count(dqm, qpd, q);
execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1683,15 +1683,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
list_del(&q->list);
qpd->queue_count--;
if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
+ decrement_queue_count(dqm, qpd, q);
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
if (retval == -ETIME)
qpd->reset_wavefronts = true;
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
}
/*
@@ -1932,7 +1928,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
/* Clean all kernel queues */
list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
list_del(&kq->list);
- decrement_queue_count(dqm, kq->queue->properties.type);
+ decrement_queue_count(dqm, qpd, kq->queue);
qpd->is_debug = false;
dqm->total_queue_count--;
filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
@@ -1945,13 +1941,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
- if (q->properties.is_active) {
- decrement_queue_count(dqm, q->properties.type);
- if (q->properties.is_gws) {
- dqm->gws_queue_count--;
- qpd->mapped_gws_queue = false;
- }
- }
+ if (q->properties.is_active)
+ decrement_queue_count(dqm, qpd, q);
dqm->total_queue_count--;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9967a73d5b0f..8f58fc491b28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1103,7 +1103,7 @@ struct kfd_criu_queue_priv_data {
uint32_t priority;
uint32_t q_percent;
uint32_t doorbell_id;
- uint32_t is_gws;
+ uint32_t gws;
uint32_t sdma_id;
uint32_t eop_ring_buffer_size;
uint32_t ctx_save_restore_area_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 6eca9509f2e3..4f58e671d39b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -636,6 +636,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd,
q_data->ctx_save_restore_area_size =
q->properties.ctx_save_restore_area_size;
+ q_data->gws = !!q->gws;
+
ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
if (ret) {
pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
@@ -743,7 +745,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
struct kfd_criu_queue_priv_data *q_data)
{
qp->is_interop = false;
- qp->is_gws = q_data->is_gws;
qp->queue_percent = q_data->q_percent;
qp->priority = q_data->priority;
qp->queue_address = q_data->q_address;
@@ -826,12 +827,15 @@ int kfd_criu_restore_queue(struct kfd_process *p,
NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
- ret = -EINVAL;
+ goto exit;
}
+ if (q_data->gws)
+ ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
+
exit:
if (ret)
- pr_err("Failed to create queue (%d)\n", ret);
+ pr_err("Failed to restore queue (%d)\n", ret);
else
pr_debug("Queue id %d was restored successfully\n", queue_id);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 22dabe596dfc..95b5b5bfa1ff 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -4440,7 +4440,7 @@ static void dp_test_get_audio_test_data(struct dc_link *link, bool disable_video
&dpcd_pattern_type.value,
sizeof(dpcd_pattern_type));
- channel_count = dpcd_test_mode.bits.channel_count + 1;
+ channel_count = min(dpcd_test_mode.bits.channel_count + 1, AUDIO_CHANNELS_COUNT);
// read pattern periods for requested channels when sawTooth pattern is requested
if (dpcd_pattern_type.value == AUDIO_TEST_PATTERN_SAWTOOTH ||
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 3fe4bfbb98a0..faab59508d82 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -997,6 +997,7 @@ static struct clock_source *dcn21_clock_source_create(
return &clk_src->base;
}
+ kfree(clk_src);
BREAK_TO_DEBUGGER();
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
index d7559e5a99ce..e708f07fe75a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc)
dc->hwss.init_hw = dcn20_fpga_init_hw;
dc->hwseq->funcs.init_pipes = NULL;
}
- if (dc->debug.disable_z10) {
- /*hw not support z10 or sw disable it*/
- dc->hwss.z10_restore = NULL;
- dc->hwss.z10_save_init = NULL;
- }
}
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 5504d81c77b7..72e7b5d40af6 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso
void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
{
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ int i;
if (!adev->pm.dpm_enabled)
return;
@@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev)
if (!pp_funcs->pm_compute_clocks)
return;
+ if (adev->mode_info.num_crtc)
+ amdgpu_display_bandwidth_update(adev);
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ if (ring && ring->sched.ready)
+ amdgpu_fence_wait_empty(ring);
+ }
+
mutex_lock(&adev->pm.mutex);
pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle);
mutex_unlock(&adev->pm.mutex);
@@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
+ if (adev->family == AMDGPU_FAMILY_SI) {
+ mutex_lock(&adev->pm.mutex);
+ if (enable) {
+ adev->pm.dpm.uvd_active = true;
+ adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
+ } else {
+ adev->pm.dpm.uvd_active = false;
+ }
+ mutex_unlock(&adev->pm.mutex);
+
+ amdgpu_dpm_compute_clocks(adev);
+ return;
+ }
+
ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
if (ret)
DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
@@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
+ if (adev->family == AMDGPU_FAMILY_SI) {
+ mutex_lock(&adev->pm.mutex);
+ if (enable) {
+ adev->pm.dpm.vce_active = true;
+ /* XXX select vce level based on ring/task */
+ adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
+ } else {
+ adev->pm.dpm.vce_active = false;
+ }
+ mutex_unlock(&adev->pm.mutex);
+
+ amdgpu_dpm_compute_clocks(adev);
+ return;
+ }
+
ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
if (ret)
DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index 9613c6181c17..d3fe149d8476 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
void amdgpu_legacy_dpm_compute_clocks(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i = 0;
-
- if (adev->mode_info.num_crtc)
- amdgpu_display_bandwidth_update(adev);
-
- for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
- if (ring && ring->sched.ready)
- amdgpu_fence_wait_empty(ring);
- }
amdgpu_dpm_get_active_displays(adev);
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index caae54487f9c..633dab14f51c 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev)
}
#endif
-static int si_set_powergating_by_smu(void *handle,
- uint32_t block_type,
- bool gate)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- switch (block_type) {
- case AMD_IP_BLOCK_TYPE_UVD:
- if (!gate) {
- adev->pm.dpm.uvd_active = true;
- adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
- } else {
- adev->pm.dpm.uvd_active = false;
- }
-
- amdgpu_legacy_dpm_compute_clocks(handle);
- break;
- case AMD_IP_BLOCK_TYPE_VCE:
- if (!gate) {
- adev->pm.dpm.vce_active = true;
- /* XXX select vce level based on ring/task */
- adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
- } else {
- adev->pm.dpm.vce_active = false;
- }
-
- amdgpu_legacy_dpm_compute_clocks(handle);
- break;
- default:
- break;
- }
- return 0;
-}
-
static int si_set_sw_state(struct amdgpu_device *adev)
{
return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ?
@@ -8125,7 +8091,6 @@ static const struct amd_pm_funcs si_dpm_funcs = {
.print_power_state = &si_dpm_print_power_state,
.debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level,
.force_performance_level = &si_dpm_force_performance_level,
- .set_powergating_by_smu = &si_set_powergating_by_smu,
.vblank_too_short = &si_dpm_vblank_too_short,
.set_fan_control_mode = &si_dpm_set_fan_control_mode,
.get_fan_control_mode = &si_dpm_get_fan_control_mode,
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index a2da46bf3985..71e9c6ce6b1a 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -1487,16 +1487,6 @@ static void pp_pm_compute_clocks(void *handle)
{
struct pp_hwmgr *hwmgr = handle;
struct amdgpu_device *adev = hwmgr->adev;
- int i = 0;
-
- if (adev->mode_info.num_crtc)
- amdgpu_display_bandwidth_update(adev);
-
- for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
- if (ring && ring->sched.ready)
- amdgpu_fence_wait_empty(ring);
- }
if (!amdgpu_device_has_dc_support(adev)) {
amdgpu_dpm_get_active_displays(adev);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index f1544755d8b4..f10a0256413e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1351,14 +1351,8 @@ static int smu_disable_dpms(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- /*
- * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair
- * the workaround which always reset the asic in suspend.
- * It's likely that workaround will be dropped in the future.
- * Then the change here should be dropped together.
- */
bool use_baco = !smu->is_apu &&
- (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) &&
+ ((amdgpu_in_reset(adev) &&
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev)));
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 007e5a282f67..2145b08f9534 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -78,6 +78,7 @@ config DRM_ITE_IT6505
tristate "ITE IT6505 DisplayPort bridge"
depends on OF
select DRM_KMS_HELPER
+ select DRM_DP_HELPER
select EXTCON
help
ITE IT6505 DisplayPort bridge chip driver.
diff --git a/drivers/gpu/drm/dp/drm_dp_mst_topology.c b/drivers/gpu/drm/dp/drm_dp_mst_topology.c
index 11300b53d24f..7a7cc44686f9 100644
--- a/drivers/gpu/drm/dp/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/dp/drm_dp_mst_topology.c
@@ -4852,6 +4852,7 @@ static void fetch_monitor_name(struct drm_dp_mst_topology_mgr *mgr,
mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port);
drm_edid_get_monitor_name(mst_edid, name, namelen);
+ kfree(mst_edid);
}
/**
diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index f4df344509a8..9a2cfab3a177 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -214,29 +214,6 @@ int drm_of_encoder_active_endpoint(struct device_node *node,
}
EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint);
-static int find_panel_or_bridge(struct device_node *node,
- struct drm_panel **panel,
- struct drm_bridge **bridge)
-{
- if (panel) {
- *panel = of_drm_find_panel(node);
- if (!IS_ERR(*panel))
- return 0;
-
- /* Clear the panel pointer in case of error. */
- *panel = NULL;
- }
-
- /* No panel found yet, check for a bridge next. */
- if (bridge) {
- *bridge = of_drm_find_bridge(node);
- if (*bridge)
- return 0;
- }
-
- return -EPROBE_DEFER;
-}
-
/**
* drm_of_find_panel_or_bridge - return connected panel or bridge device
* @np: device tree node containing encoder output ports
@@ -259,44 +236,49 @@ int drm_of_find_panel_or_bridge(const struct device_node *np,
struct drm_panel **panel,
struct drm_bridge **bridge)
{
- struct device_node *node;
- int ret;
+ int ret = -EPROBE_DEFER;
+ struct device_node *remote;
if (!panel && !bridge)
return -EINVAL;
-
if (panel)
*panel = NULL;
- if (bridge)
- *bridge = NULL;
-
- /* Check for a graph on the device node first. */
- if (of_graph_is_present(np)) {
- node = of_graph_get_remote_node(np, port, endpoint);
- if (node) {
- ret = find_panel_or_bridge(node, panel, bridge);
- of_node_put(node);
-
- if (!ret)
- return 0;
- }
- }
- /* Otherwise check for any child node other than port/ports. */
- for_each_available_child_of_node(np, node) {
- if (of_node_name_eq(node, "port") ||
- of_node_name_eq(node, "ports"))
- continue;
+ /*
+ * of_graph_get_remote_node() produces a noisy error message if port
+ * node isn't found and the absence of the port is a legit case here,
+ * so at first we silently check whether graph presents in the
+ * device-tree node.
+ */
+ if (!of_graph_is_present(np))
+ return -ENODEV;
- ret = find_panel_or_bridge(node, panel, bridge);
- of_node_put(node);
+ remote = of_graph_get_remote_node(np, port, endpoint);
+ if (!remote)
+ return -ENODEV;
+
+ if (panel) {
+ *panel = of_drm_find_panel(remote);
+ if (!IS_ERR(*panel))
+ ret = 0;
+ else
+ *panel = NULL;
+ }
+
+ /* No panel found yet, check for a bridge next. */
+ if (bridge) {
+ if (ret) {
+ *bridge = of_drm_find_bridge(remote);
+ if (*bridge)
+ ret = 0;
+ } else {
+ *bridge = NULL;
+ }
- /* Stop at the first found occurrence. */
- if (!ret)
- return 0;
}
- return -EPROBE_DEFER;
+ of_node_put(remote);
+ return ret;
}
EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge);
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 7616a3906b9e..1b774dcfb281 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -367,6 +367,44 @@ static void dmc_set_fw_offset(struct intel_dmc *dmc,
}
}
+static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc,
+ const u32 *mmioaddr, u32 mmio_count,
+ int header_ver, u8 dmc_id)
+{
+ struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc);
+ u32 start_range, end_range;
+ int i;
+
+ if (dmc_id >= DMC_FW_MAX) {
+ drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id);
+ return false;
+ }
+
+ if (header_ver == 1) {
+ start_range = DMC_MMIO_START_RANGE;
+ end_range = DMC_MMIO_END_RANGE;
+ } else if (dmc_id == DMC_FW_MAIN) {
+ start_range = TGL_MAIN_MMIO_START;
+ end_range = TGL_MAIN_MMIO_END;
+ } else if (DISPLAY_VER(i915) >= 13) {
+ start_range = ADLP_PIPE_MMIO_START;
+ end_range = ADLP_PIPE_MMIO_END;
+ } else if (DISPLAY_VER(i915) >= 12) {
+ start_range = TGL_PIPE_MMIO_START(dmc_id);
+ end_range = TGL_PIPE_MMIO_END(dmc_id);
+ } else {
+ drm_warn(&i915->drm, "Unknown mmio range for sanity check");
+ return false;
+ }
+
+ for (i = 0; i < mmio_count; i++) {
+ if (mmioaddr[i] < start_range || mmioaddr[i] > end_range)
+ return false;
+ }
+
+ return true;
+}
+
static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
const struct intel_dmc_header_base *dmc_header,
size_t rem_size, u8 dmc_id)
@@ -436,6 +474,12 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc,
return 0;
}
+ if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count,
+ dmc_header->header_ver, dmc_id)) {
+ drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n");
+ return 0;
+ }
+
for (i = 0; i < mmio_count; i++) {
dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]);
dmc_info->mmiodata[i] = mmiodata[i];
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index d667657e3606..f868db8be02a 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -4383,13 +4383,20 @@ intel_dp_update_420(struct intel_dp *intel_dp)
static void
intel_dp_set_edid(struct intel_dp *intel_dp)
{
+ struct drm_i915_private *i915 = dp_to_i915(intel_dp);
struct intel_connector *connector = intel_dp->attached_connector;
struct edid *edid;
+ bool vrr_capable;
intel_dp_unset_edid(intel_dp);
edid = intel_dp_get_edid(intel_dp);
connector->detect_edid = edid;
+ vrr_capable = intel_vrr_is_capable(&connector->base);
+ drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] VRR capable: %s\n",
+ connector->base.base.id, connector->base.name, str_yes_no(vrr_capable));
+ drm_connector_set_vrr_capable_property(&connector->base, vrr_capable);
+
intel_dp_update_dfp(intel_dp, edid);
intel_dp_update_420(intel_dp);
@@ -4422,6 +4429,9 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
intel_dp->dfp.ycbcr_444_to_420 = false;
connector->base.ycbcr_420_allowed = false;
+
+ drm_connector_set_vrr_capable_property(&connector->base,
+ false);
}
static int
@@ -4572,14 +4582,9 @@ static int intel_dp_get_modes(struct drm_connector *connector)
int num_modes = 0;
edid = intel_connector->detect_edid;
- if (edid) {
+ if (edid)
num_modes = intel_connector_update_modes(connector, edid);
- if (intel_vrr_is_capable(connector))
- drm_connector_set_vrr_capable_property(connector,
- true);
- }
-
/* Also add fixed mode, which may or may not be present in EDID */
if (intel_dp_is_edp(intel_attached_dp(intel_connector)) &&
intel_connector->panel.fixed_mode) {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 97cf3cac0105..fb6cf30ee628 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -97,6 +97,14 @@
#define INTEL_EDP_BRIGHTNESS_OPTIMIZATION_1 0x359
+enum intel_dp_aux_backlight_modparam {
+ INTEL_DP_AUX_BACKLIGHT_AUTO = -1,
+ INTEL_DP_AUX_BACKLIGHT_OFF = 0,
+ INTEL_DP_AUX_BACKLIGHT_ON = 1,
+ INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2,
+ INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3,
+};
+
/* Intel EDP backlight callbacks */
static bool
intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
@@ -126,6 +134,24 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
return false;
}
+ /*
+ * If we don't have HDR static metadata there is no way to
+ * runtime detect used range for nits based control. For now
+ * do not use Intel proprietary eDP backlight control if we
+ * don't have this data in panel EDID. In case we find panel
+ * which supports only nits based control, but doesn't provide
+ * HDR static metadata we need to start maintaining table of
+ * ranges for such panels.
+ */
+ if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL &&
+ !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type &
+ BIT(HDMI_STATIC_METADATA_TYPE1))) {
+ drm_info(&i915->drm,
+ "Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n",
+ INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL);
+ return false;
+ }
+
panel->backlight.edp.intel.sdr_uses_aux =
tcon_cap[2] & INTEL_EDP_SDR_TCON_BRIGHTNESS_AUX_CAP;
@@ -413,14 +439,6 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = {
.get = intel_dp_aux_vesa_get_backlight,
};
-enum intel_dp_aux_backlight_modparam {
- INTEL_DP_AUX_BACKLIGHT_AUTO = -1,
- INTEL_DP_AUX_BACKLIGHT_OFF = 0,
- INTEL_DP_AUX_BACKLIGHT_ON = 1,
- INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2,
- INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3,
-};
-
int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector)
{
struct drm_device *dev = connector->base.dev;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 87f4af3fd523..3e61a8936245 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1037,7 +1037,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
struct intel_plane_state *plane_state =
intel_atomic_get_new_plane_state(state, plane);
const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct intel_crtc *crtc = to_intel_crtc(plane_state->uapi.crtc);
+ struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc);
const struct intel_crtc_state *crtc_state;
struct intel_fbc *fbc = plane->fbc;
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index bff8c2d73cdf..6c9e6e7f0afd 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -887,6 +887,20 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
return false;
}
+ /* Wa_16011303918:adl-p */
+ if (crtc_state->vrr.enable &&
+ IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR2 not enabled, not compatible with HW stepping + VRR\n");
+ return false;
+ }
+
+ if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n");
+ return false;
+ }
+
if (HAS_PSR2_SEL_FETCH(dev_priv)) {
if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) &&
!HAS_PSR_HW_TRACKING(dev_priv)) {
@@ -900,12 +914,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
if (!crtc_state->enable_psr2_sel_fetch &&
IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) {
drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n");
- return false;
+ goto unsupported;
}
if (!psr2_granularity_check(intel_dp, crtc_state)) {
drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, SU granularity not compatible\n");
- return false;
+ goto unsupported;
}
if (!crtc_state->enable_psr2_sel_fetch &&
@@ -914,25 +928,15 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
"PSR2 not enabled, resolution %dx%d > max supported %dx%d\n",
crtc_hdisplay, crtc_vdisplay,
psr_max_h, psr_max_v);
- return false;
- }
-
- if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
- drm_dbg_kms(&dev_priv->drm,
- "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n");
- return false;
- }
-
- /* Wa_16011303918:adl-p */
- if (crtc_state->vrr.enable &&
- IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
- drm_dbg_kms(&dev_priv->drm,
- "PSR2 not enabled, not compatible with HW stepping + VRR\n");
- return false;
+ goto unsupported;
}
tgl_dc3co_exitline_compute_config(intel_dp, crtc_state);
return true;
+
+unsupported:
+ crtc_state->enable_psr2_sel_fetch = false;
+ return false;
}
void intel_psr_compute_config(struct intel_dp *intel_dp,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index d42f437149c9..6ca8929cf6e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1252,14 +1252,12 @@ static void *reloc_iomap(struct i915_vma *batch,
* Only attempt to pin the batch buffer to ggtt if the current batch
* is not inside ggtt, or the batch buffer is not misplaced.
*/
- if (!i915_is_ggtt(batch->vm)) {
+ if (!i915_is_ggtt(batch->vm) ||
+ !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ |
PIN_NOEVICT);
- } else if (i915_vma_is_map_and_fenceable(batch)) {
- __i915_vma_pin(batch);
- vma = batch;
}
if (vma == ERR_PTR(-EDEADLK))
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 82713264b96c..b7c6d4462ec5 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -806,7 +806,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
__intel_engine_reset(engine, stalled_mask & engine->mask);
local_bh_enable();
- intel_uc_reset(&gt->uc, true);
+ intel_uc_reset(&gt->uc, ALL_ENGINES);
intel_ggtt_restore_fences(gt->ggtt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index bf7079480d47..2488d1197f3e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -438,7 +438,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc);
void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq);
void intel_guc_submission_reset_prepare(struct intel_guc *guc);
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled);
void intel_guc_submission_reset_finish(struct intel_guc *guc);
void intel_guc_submission_cancel_requests(struct intel_guc *guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1ce7e04aa837..28f9aac0201d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1590,9 +1590,9 @@ __unwind_incomplete_requests(struct intel_context *ce)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
-static void __guc_reset_context(struct intel_context *ce, bool stalled)
+static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
{
- bool local_stalled;
+ bool guilty;
struct i915_request *rq;
unsigned long flags;
u32 head;
@@ -1620,7 +1620,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
if (!intel_context_is_pinned(ce))
goto next_context;
- local_stalled = false;
+ guilty = false;
rq = intel_context_find_active_request(ce);
if (!rq) {
head = ce->ring->tail;
@@ -1628,14 +1628,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
}
if (i915_request_started(rq))
- local_stalled = true;
+ guilty = stalled & ce->engine->mask;
GEM_BUG_ON(i915_active_is_idle(&ce->active));
head = intel_ring_wrap(ce->ring, rq->head);
- __i915_request_reset(rq, local_stalled && stalled);
+ __i915_request_reset(rq, guilty);
out_replay:
- guc_reset_state(ce, head, local_stalled && stalled);
+ guc_reset_state(ce, head, guilty);
next_context:
if (i != number_children)
ce = list_next_entry(ce, parallel.child_link);
@@ -1645,7 +1645,7 @@ next_context:
intel_context_put(parent);
}
-void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
+void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
{
struct intel_context *ce;
unsigned long index;
@@ -4013,7 +4013,7 @@ static void guc_context_replay(struct intel_context *ce)
{
struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
- __guc_reset_context(ce, true);
+ __guc_reset_context(ce, ce->engine->mask);
tasklet_hi_schedule(&sched_engine->tasklet);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index da199aa6989f..8eb34de2f20c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -593,7 +593,7 @@ sanitize:
__uc_sanitize(uc);
}
-void intel_uc_reset(struct intel_uc *uc, bool stalled)
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
{
struct intel_guc *guc = &uc->guc;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 866b462821c0..a8f38c2c60e2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
void intel_uc_driver_remove(struct intel_uc *uc);
void intel_uc_init_mmio(struct intel_uc *uc);
void intel_uc_reset_prepare(struct intel_uc *uc);
-void intel_uc_reset(struct intel_uc *uc, bool stalled);
+void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
void intel_uc_reset_finish(struct intel_uc *uc);
void intel_uc_cancel_requests(struct intel_uc *uc);
void intel_uc_suspend(struct intel_uc *uc);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3c87d77d2cf6..fe960c204362 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4345,12 +4345,12 @@
#define _DSPAADDR 0x70184
#define _DSPASTRIDE 0x70188
#define _DSPAPOS 0x7018C /* reserved */
-#define DISP_POS_Y_MASK REG_GENMASK(31, 0)
+#define DISP_POS_Y_MASK REG_GENMASK(31, 16)
#define DISP_POS_Y(y) REG_FIELD_PREP(DISP_POS_Y_MASK, (y))
#define DISP_POS_X_MASK REG_GENMASK(15, 0)
#define DISP_POS_X(x) REG_FIELD_PREP(DISP_POS_X_MASK, (x))
#define _DSPASIZE 0x70190
-#define DISP_HEIGHT_MASK REG_GENMASK(31, 0)
+#define DISP_HEIGHT_MASK REG_GENMASK(31, 16)
#define DISP_HEIGHT(h) REG_FIELD_PREP(DISP_HEIGHT_MASK, (h))
#define DISP_WIDTH_MASK REG_GENMASK(15, 0)
#define DISP_WIDTH(w) REG_FIELD_PREP(DISP_WIDTH_MASK, (w))
@@ -5152,7 +5152,7 @@
#define _SEL_FETCH_PLANE_BASE_6_A 0x70940
#define _SEL_FETCH_PLANE_BASE_7_A 0x70960
#define _SEL_FETCH_PLANE_BASE_CUR_A 0x70880
-#define _SEL_FETCH_PLANE_BASE_1_B 0x70990
+#define _SEL_FETCH_PLANE_BASE_1_B 0x71890
#define _SEL_FETCH_PLANE_BASE_A(plane) _PICK(plane, \
_SEL_FETCH_PLANE_BASE_1_A, \
@@ -5501,6 +5501,22 @@
/* MMIO address range for DMC program (0x80000 - 0x82FFF) */
#define DMC_MMIO_START_RANGE 0x80000
#define DMC_MMIO_END_RANGE 0x8FFFF
+#define DMC_V1_MMIO_START_RANGE 0x80000
+#define TGL_MAIN_MMIO_START 0x8F000
+#define TGL_MAIN_MMIO_END 0x8FFFF
+#define _TGL_PIPEA_MMIO_START 0x92000
+#define _TGL_PIPEA_MMIO_END 0x93FFF
+#define _TGL_PIPEB_MMIO_START 0x96000
+#define _TGL_PIPEB_MMIO_END 0x97FFF
+#define ADLP_PIPE_MMIO_START 0x5F000
+#define ADLP_PIPE_MMIO_END 0x5FFFF
+
+#define TGL_PIPE_MMIO_START(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\
+ _TGL_PIPEB_MMIO_START)
+
+#define TGL_PIPE_MMIO_END(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\
+ _TGL_PIPEB_MMIO_END)
+
#define SKL_DMC_DC3_DC5_COUNT _MMIO(0x80030)
#define SKL_DMC_DC5_DC6_COUNT _MMIO(0x8002C)
#define BXT_DMC_DC3_DC5_COUNT _MMIO(0x80038)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 94fcdb7bd21d..eeaa8d0d0407 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1605,17 +1605,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma)
{
- struct intel_gt *gt = vma->vm->gt;
-
- spin_lock_irq(&gt->closed_lock);
list_del_init(&vma->closed_link);
- spin_unlock_irq(&gt->closed_lock);
}
void i915_vma_reopen(struct i915_vma *vma)
{
+ struct intel_gt *gt = vma->vm->gt;
+
+ spin_lock_irq(&gt->closed_lock);
if (i915_vma_is_closed(vma))
__i915_vma_remove_closed(vma);
+ spin_unlock_irq(&gt->closed_lock);
}
void i915_vma_release(struct kref *ref)
@@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma)
static void release_references(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
+ struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1650,7 +1651,9 @@ static void release_references(struct i915_vma *vma)
rb_erase(&vma->obj_node, &obj->vma.tree);
spin_unlock(&obj->vma.lock);
+ spin_lock_irq(&gt->closed_lock);
__i915_vma_remove_closed(vma);
+ spin_unlock_irq(&gt->closed_lock);
__i915_vma_put(vma);
}
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index a42732b67349..178b774a5fbd 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -580,12 +580,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
dp->dp_display.connector_type, state);
mutex_unlock(&dp->event_mutex);
- /*
- * add fail safe mode outside event_mutex scope
- * to avoid potiential circular lock with drm thread
- */
- dp_panel_add_fail_safe_mode(dp->dp_display.connector);
-
/* uevent will complete connection part */
return 0;
};
diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c
index 26c3653c99ec..26f4b6959c31 100644
--- a/drivers/gpu/drm/msm/dp/dp_panel.c
+++ b/drivers/gpu/drm/msm/dp/dp_panel.c
@@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct drm_connector *connector,
return rc;
}
-void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
-{
- /* fail safe edid */
- mutex_lock(&connector->dev->mode_config.mutex);
- if (drm_add_modes_noedid(connector, 640, 480))
- drm_set_preferred_mode(connector, 640, 480);
- mutex_unlock(&connector->dev->mode_config.mutex);
-}
-
int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
struct drm_connector *connector)
{
@@ -215,8 +206,6 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
rc = -ETIMEDOUT;
goto end;
}
-
- dp_panel_add_fail_safe_mode(connector);
}
if (panel->aux_cfg_update_done) {
diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h
index 99739ea679a7..9023e5bb4b8b 100644
--- a/drivers/gpu/drm/msm/dp/dp_panel.h
+++ b/drivers/gpu/drm/msm/dp/dp_panel.h
@@ -59,7 +59,6 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel);
int dp_panel_deinit(struct dp_panel *dp_panel);
int dp_panel_timing_cfg(struct dp_panel *dp_panel);
void dp_panel_dump_regs(struct dp_panel *dp_panel);
-void dp_panel_add_fail_safe_mode(struct drm_connector *connector);
int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
struct drm_connector *connector);
u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_max_bpp,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9c36b505daab..affa95eb05fc 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -274,7 +274,7 @@ bool msm_use_mmu(struct drm_device *dev)
struct msm_drm_private *priv = dev->dev_private;
/* a2xx comes with its own MMU */
- return priv->is_a2xx || device_iommu_mapped(dev->dev);
+ return priv->is_a2xx || iommu_present(&platform_bus_type);
}
static int msm_init_vram(struct drm_device *dev)
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index daf9f87477ba..a2141d3d9b1d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -46,8 +46,9 @@ static bool
nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE],
struct nouveau_backlight *bl)
{
- const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL);
- if (nb < 0 || nb >= 100)
+ const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL);
+
+ if (nb < 0)
return false;
if (nb > 0)
snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb);
@@ -414,7 +415,7 @@ nouveau_backlight_init(struct drm_connector *connector)
nv_encoder, ops, &props);
if (IS_ERR(bl->dev)) {
if (bl->id >= 0)
- ida_simple_remove(&bl_ida, bl->id);
+ ida_free(&bl_ida, bl->id);
ret = PTR_ERR(bl->dev);
goto fail_alloc;
}
@@ -442,7 +443,7 @@ nouveau_backlight_fini(struct drm_connector *connector)
return;
if (bl->id >= 0)
- ida_simple_remove(&bl_ida, bl->id);
+ ida_free(&bl_ida, bl->id);
backlight_device_unregister(bl->dev);
nv_conn->backlight = NULL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 992cc285f2fe..2ed528c065fa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -123,7 +123,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
mutex_init(&tdev->iommu.mutex);
- if (iommu_present(&platform_bus_type)) {
+ if (device_iommu_mapped(dev)) {
tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
if (!tdev->iommu.domain)
goto error;
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
index 46029c5610c8..145047e19394 100644
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -229,7 +229,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts,
ret = i2c_smbus_write_byte_data(ts->i2c, reg, val);
if (ret)
- dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret);
+ dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret);
}
static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
@@ -265,7 +265,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel)
return 0;
}
-static int rpi_touchscreen_enable(struct drm_panel *panel)
+static int rpi_touchscreen_prepare(struct drm_panel *panel)
{
struct rpi_touchscreen *ts = panel_to_ts(panel);
int i;
@@ -295,6 +295,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel)
rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01);
msleep(100);
+ return 0;
+}
+
+static int rpi_touchscreen_enable(struct drm_panel *panel)
+{
+ struct rpi_touchscreen *ts = panel_to_ts(panel);
+
/* Turn on the backlight. */
rpi_touchscreen_i2c_write(ts, REG_PWM, 255);
@@ -349,7 +356,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel,
static const struct drm_panel_funcs rpi_touchscreen_funcs = {
.disable = rpi_touchscreen_disable,
.unprepare = rpi_touchscreen_noop,
- .prepare = rpi_touchscreen_noop,
+ .prepare = rpi_touchscreen_prepare,
.enable = rpi_touchscreen_enable,
.get_modes = rpi_touchscreen_get_modes,
};
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index b991ba1bcd51..f63efd8d5e52 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
struct dma_fence *f;
int r = 0;
- dma_resv_for_each_fence(&cursor, resv, shared, f) {
+ dma_resv_for_each_fence(&cursor, resv, !shared, f) {
fence = to_radeon_fence(f);
if (fence && fence->rdev == rdev)
radeon_sync_fence(sync, fence);
diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c
index 56ae38389db0..462fae73eae9 100644
--- a/drivers/gpu/drm/sun4i/sun4i_frontend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c
@@ -222,13 +222,11 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend,
/* Set the physical address of the buffer in memory */
paddr = drm_fb_cma_get_gem_addr(fb, state, 0);
- paddr -= PHYS_OFFSET;
DRM_DEBUG_DRIVER("Setting buffer #0 address to %pad\n", &paddr);
regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR0_REG, paddr);
if (fb->format->num_planes > 1) {
paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 2 : 1);
- paddr -= PHYS_OFFSET;
DRM_DEBUG_DRIVER("Setting buffer #1 address to %pad\n", &paddr);
regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR1_REG,
paddr);
@@ -236,7 +234,6 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend,
if (fb->format->num_planes > 2) {
paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 1 : 2);
- paddr -= PHYS_OFFSET;
DRM_DEBUG_DRIVER("Setting buffer #2 address to %pad\n", &paddr);
regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR2_REG,
paddr);
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index de3424fed2fc..6cf2621786e6 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -2,6 +2,9 @@
config DRM_VC4
tristate "Broadcom VC4 Graphics"
depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST
+ # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only
+ # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE.
+ depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE)
depends on DRM
depends on SND && SND_SOC
depends on COMMON_CLK
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 752f921735c6..98308a17e4ed 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -846,7 +846,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
unsigned long phy_clock;
int ret;
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret) {
DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->variant->port);
return;
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 6c58b0fd13fb..98b78ec6b37d 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -38,6 +38,7 @@
#include <drm/drm_scdc_helper.h>
#include <linux/clk.h>
#include <linux/component.h>
+#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/of_address.h>
#include <linux/of_gpio.h>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 31aecc46624b..04c8a378aeed 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -46,6 +46,21 @@ vmw_buffer_object(struct ttm_buffer_object *bo)
return container_of(bo, struct vmw_buffer_object, base);
}
+/**
+ * bo_is_vmw - check if the buffer object is a &vmw_buffer_object
+ * @bo: ttm buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * a &vmw_buffer_object.
+ *
+ * Returns:
+ * true if the object is of &vmw_buffer_object type, false if not.
+ */
+static bool bo_is_vmw(struct ttm_buffer_object *bo)
+{
+ return bo->destroy == &vmw_bo_bo_free ||
+ bo->destroy == &vmw_gem_destroy;
+}
/**
* vmw_bo_pin_in_placement - Validate a buffer to placement.
@@ -615,8 +630,9 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
vmw_bo_unreference(&vbo);
- if (unlikely(ret != 0 && ret != -ERESTARTSYS &&
- ret != -EBUSY)) {
+ if (unlikely(ret != 0)) {
+ if (ret == -ERESTARTSYS || ret == -EBUSY)
+ return -EBUSY;
DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n",
(unsigned int) arg->handle);
return ret;
@@ -798,7 +814,7 @@ int vmw_dumb_create(struct drm_file *file_priv,
void vmw_bo_swap_notify(struct ttm_buffer_object *bo)
{
/* Is @bo embedded in a struct vmw_buffer_object? */
- if (vmw_bo_is_vmw_bo(bo))
+ if (!bo_is_vmw(bo))
return;
/* Kill any cached kernel maps before swapout */
@@ -822,7 +838,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
struct vmw_buffer_object *vbo;
/* Make sure @bo is embedded in a struct vmw_buffer_object? */
- if (vmw_bo_is_vmw_bo(bo))
+ if (!bo_is_vmw(bo))
return;
vbo = container_of(bo, struct vmw_buffer_object, base);
@@ -843,22 +859,3 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB)
vmw_resource_unbind_list(vbo);
}
-
-/**
- * vmw_bo_is_vmw_bo - check if the buffer object is a &vmw_buffer_object
- * @bo: buffer object to be checked
- *
- * Uses destroy function associated with the object to determine if this is
- * a &vmw_buffer_object.
- *
- * Returns:
- * true if the object is of &vmw_buffer_object type, false if not.
- */
-bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo)
-{
- if (bo->destroy == &vmw_bo_bo_free ||
- bo->destroy == &vmw_gem_destroy)
- return true;
-
- return false;
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
index a3bfbb6c3e14..162dfeb1cc5a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
@@ -528,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
*seqno = atomic_add_return(1, &dev_priv->marker_seq);
} while (*seqno == 0);
- if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) {
+ if (!vmw_has_fences(dev_priv)) {
/*
* Don't request hardware to send a fence. The
@@ -675,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv,
*/
bool vmw_cmd_supported(struct vmw_private *vmw)
{
- if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
- SVGA_CAP_CMD_BUFFERS_2)) != 0)
- return true;
+ bool has_cmdbufs =
+ (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+ SVGA_CAP_CMD_BUFFERS_2)) != 0;
+ if (vmw_is_svga_v3(vmw))
+ return (has_cmdbufs &&
+ (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0);
/*
* We have FIFO cmd's
*/
- return vmw->fifo_mem != NULL;
+ return has_cmdbufs || vmw->fifo_mem != NULL;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 26eb5478394a..163c00793eb1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -998,13 +998,10 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
goto out_no_fman;
}
- drm_vma_offset_manager_init(&dev_priv->vma_manager,
- DRM_FILE_PAGE_OFFSET_START,
- DRM_FILE_PAGE_OFFSET_SIZE);
ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver,
dev_priv->drm.dev,
dev_priv->drm.anon_inode->i_mapping,
- &dev_priv->vma_manager,
+ dev_priv->drm.vma_offset_manager,
dev_priv->map_mode == vmw_dma_alloc_coherent,
false);
if (unlikely(ret != 0)) {
@@ -1174,7 +1171,6 @@ static void vmw_driver_unload(struct drm_device *dev)
vmw_devcaps_destroy(dev_priv);
vmw_vram_manager_fini(dev_priv);
ttm_device_fini(&dev_priv->bdev);
- drm_vma_offset_manager_destroy(&dev_priv->vma_manager);
vmw_release_device_late(dev_priv);
vmw_fence_manager_takedown(dev_priv->fman);
if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
@@ -1398,7 +1394,7 @@ vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
- &dev_priv->vma_manager);
+ dev_priv->drm.vma_offset_manager);
}
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index ea3ecdda561d..6de0b9ef5c77 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1679,4 +1679,12 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw,
outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT);
}
+static inline bool vmw_has_fences(struct vmw_private *vmw)
+{
+ if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS |
+ SVGA_CAP_CMD_BUFFERS_2)) != 0)
+ return true;
+ return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
+}
+
#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 8ee34576c7d0..adf17c740656 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -483,7 +483,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par,
static int vmw_fb_kms_framebuffer(struct fb_info *info)
{
- struct drm_mode_fb_cmd2 mode_cmd;
+ struct drm_mode_fb_cmd2 mode_cmd = {0};
struct vmw_fb_par *par = info->par;
struct fb_var_screeninfo *var = &info->var;
struct drm_framebuffer *cur_fb;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 59d6a2dd4c2e..66cc35dc223e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -82,6 +82,22 @@ fman_from_fence(struct vmw_fence_obj *fence)
return container_of(fence->base.lock, struct vmw_fence_manager, lock);
}
+static u32 vmw_fence_goal_read(struct vmw_private *vmw)
+{
+ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+ return vmw_read(vmw, SVGA_REG_FENCE_GOAL);
+ else
+ return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL);
+}
+
+static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value)
+{
+ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+ vmw_write(vmw, SVGA_REG_FENCE_GOAL, value);
+ else
+ vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value);
+}
+
/*
* Note on fencing subsystem usage of irqs:
* Typically the vmw_fences_update function is called
@@ -392,7 +408,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
if (likely(!fman->seqno_valid))
return false;
- goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
+ goal_seqno = vmw_fence_goal_read(fman->dev_priv);
if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
return false;
@@ -400,9 +416,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
list_for_each_entry(fence, &fman->fence_list, head) {
if (!list_empty(&fence->seq_passed_actions)) {
fman->seqno_valid = true;
- vmw_fifo_mem_write(fman->dev_priv,
- SVGA_FIFO_FENCE_GOAL,
- fence->base.seqno);
+ vmw_fence_goal_write(fman->dev_priv,
+ fence->base.seqno);
break;
}
}
@@ -434,13 +449,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
if (dma_fence_is_signaled_locked(&fence->base))
return false;
- goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL);
+ goal_seqno = vmw_fence_goal_read(fman->dev_priv);
if (likely(fman->seqno_valid &&
goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
return false;
- vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL,
- fence->base.seqno);
+ vmw_fence_goal_write(fman->dev_priv, fence->base.seqno);
fman->seqno_valid = true;
return true;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index c5191de365ca..fe4732bf2c9d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -32,6 +32,14 @@
#define VMW_FENCE_WRAP (1 << 24)
+static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw)
+{
+ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0)
+ return SVGA_IRQFLAG_REG_FENCE_GOAL;
+ else
+ return SVGA_IRQFLAG_FENCE_GOAL;
+}
+
/**
* vmw_thread_fn - Deferred (process context) irq handler
*
@@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg)
wake_up_all(&dev_priv->fifo_queue);
if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE |
- SVGA_IRQFLAG_FENCE_GOAL)) &&
+ vmw_irqflag_fence_goal(dev_priv))) &&
!test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending))
ret = IRQ_WAKE_THREAD;
@@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv,
if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
return true;
- if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) &&
- vmw_fifo_idle(dev_priv, seqno))
+ if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno))
return true;
/**
@@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
unsigned long timeout)
{
struct vmw_fifo_state *fifo_state = dev_priv->fifo;
+ bool fifo_down = false;
uint32_t count = 0;
uint32_t signal_seq;
@@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
*/
if (fifo_idle) {
- down_read(&fifo_state->rwsem);
if (dev_priv->cman) {
ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible,
10*HZ);
if (ret)
goto out_err;
+ } else if (fifo_state) {
+ down_read(&fifo_state->rwsem);
+ fifo_down = true;
}
}
@@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
}
}
finish_wait(&dev_priv->fence_queue, &__wait);
- if (ret == 0 && fifo_idle)
+ if (ret == 0 && fifo_idle && fifo_state)
vmw_fence_write(dev_priv, signal_seq);
wake_up_all(&dev_priv->fence_queue);
out_err:
- if (fifo_idle)
+ if (fifo_down)
up_read(&fifo_state->rwsem);
return ret;
@@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
void vmw_goal_waiter_add(struct vmw_private *dev_priv)
{
- vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+ vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv),
&dev_priv->goal_queue_waiters);
}
void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
{
- vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+ vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv),
&dev_priv->goal_queue_waiters);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index bbd2f4ec08ec..93431e8f6606 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1344,7 +1344,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
mode_cmd,
is_bo_proxy);
-
/*
* vmw_create_bo_proxy() adds a reference that is no longer
* needed
@@ -1385,13 +1384,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
ret = vmw_user_lookup_handle(dev_priv, file_priv,
mode_cmd->handles[0],
&surface, &bo);
- if (ret)
+ if (ret) {
+ DRM_ERROR("Invalid buffer object handle %u (0x%x).\n",
+ mode_cmd->handles[0], mode_cmd->handles[0]);
goto err_out;
+ }
if (!bo &&
!vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) {
- DRM_ERROR("Surface size cannot exceed %dx%d",
+ DRM_ERROR("Surface size cannot exceed %dx%d\n",
dev_priv->texture_max_width,
dev_priv->texture_max_height);
goto err_out;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 00e8e27e4884..ace7ca150b03 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -683,6 +683,9 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
container_of(base, struct vmw_user_surface, prime.base);
struct vmw_resource *res = &user_srf->srf.res;
+ if (base->shareable && res && res->backup)
+ drm_gem_object_put(&res->backup->base.base);
+
*p_base = NULL;
vmw_resource_unreference(&res);
}
@@ -857,6 +860,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
goto out_unlock;
}
vmw_bo_reference(res->backup);
+ drm_gem_object_get(&res->backup->base.base);
}
tmp = vmw_resource_reference(&srf->res);
@@ -1513,7 +1517,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
&res->backup);
if (ret == 0)
vmw_bo_reference(res->backup);
-
}
if (unlikely(ret != 0)) {
@@ -1561,6 +1564,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
drm_vma_node_offset_addr(&res->backup->base.base.vma_node);
rep->buffer_size = res->backup->base.base.size;
rep->buffer_handle = backup_handle;
+ if (user_srf->prime.base.shareable)
+ drm_gem_object_get(&res->backup->base.base);
} else {
rep->buffer_map_handle = 0;
rep->buffer_size = 0;
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 68a8a27ab3b7..f2b038fa3b84 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -960,7 +960,7 @@ config SENSORS_LTC4261
config SENSORS_LTQ_CPUTEMP
bool "Lantiq cpu temperature sensor driver"
- depends on LANTIQ
+ depends on SOC_XWAY
help
If you say yes here you get support for the temperature
sensor inside your CPU.
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index fb6d14d213a1..c67cd037a93f 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -19,6 +19,7 @@
#include <linux/log2.h>
#include <linux/kthread.h>
#include <linux/regmap.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/util_macros.h>
@@ -294,11 +295,10 @@ static int adt7470_update_thread(void *p)
adt7470_read_temperatures(data);
mutex_unlock(&data->lock);
- set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
break;
- schedule_timeout(msecs_to_jiffies(data->auto_update_interval));
+ schedule_timeout_interruptible(msecs_to_jiffies(data->auto_update_interval));
}
return 0;
diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c
index 8fdcb62ae52d..9e935e34c998 100644
--- a/drivers/hwmon/asus_wmi_sensors.c
+++ b/drivers/hwmon/asus_wmi_sensors.c
@@ -71,7 +71,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = {
DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"),
- DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"),
+ DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"),
DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"),
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 938a8b9ec70d..6830e029995d 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -1578,8 +1578,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr,
temp *= 125;
if (sign)
temp -= 128000;
- } else
- temp = data->temp[nr] * 1000;
+ } else {
+ temp = ((s8)data->temp[nr]) * 1000;
+ }
return sprintf(buf, "%d\n", temp);
}
diff --git a/drivers/hwmon/pmbus/delta-ahe50dc-fan.c b/drivers/hwmon/pmbus/delta-ahe50dc-fan.c
index 40dffd9c4cbf..f546f0c12497 100644
--- a/drivers/hwmon/pmbus/delta-ahe50dc-fan.c
+++ b/drivers/hwmon/pmbus/delta-ahe50dc-fan.c
@@ -14,6 +14,21 @@
#define AHE50DC_PMBUS_READ_TEMP4 0xd0
+static int ahe50dc_fan_write_byte(struct i2c_client *client, int page, u8 value)
+{
+ /*
+ * The CLEAR_FAULTS operation seems to sometimes (unpredictably, perhaps
+ * 5% of the time or so) trigger a problematic phenomenon in which the
+ * fan speeds surge momentarily and at least some (perhaps all?) of the
+ * system's power outputs experience a glitch.
+ *
+ * However, according to Delta it should be OK to simply not send any
+ * CLEAR_FAULTS commands (the device doesn't seem to be capable of
+ * reporting any faults anyway), so just blackhole them unconditionally.
+ */
+ return value == PMBUS_CLEAR_FAULTS ? -EOPNOTSUPP : -ENODATA;
+}
+
static int ahe50dc_fan_read_word_data(struct i2c_client *client, int page, int phase, int reg)
{
/* temp1 in (virtual) page 1 is remapped to mfr-specific temp4 */
@@ -68,6 +83,7 @@ static struct pmbus_driver_info ahe50dc_fan_info = {
PMBUS_HAVE_VIN | PMBUS_HAVE_FAN12 | PMBUS_HAVE_FAN34 |
PMBUS_HAVE_STATUS_FAN12 | PMBUS_HAVE_STATUS_FAN34 | PMBUS_PAGE_VIRTUAL,
.func[1] = PMBUS_HAVE_TEMP | PMBUS_PAGE_VIRTUAL,
+ .write_byte = ahe50dc_fan_write_byte,
.read_word_data = ahe50dc_fan_read_word_data,
};
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index b2618b1d529e..d93574d6a1fb 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2326,6 +2326,9 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
data->has_status_word = true;
}
+ /* Make sure PEC is disabled, will be enabled later if needed */
+ client->flags &= ~I2C_CLIENT_PEC;
+
/* Enable PEC if the controller and bus supports it */
if (!(data->flags & PMBUS_NO_CAPABILITY)) {
ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
diff --git a/drivers/hwmon/pmbus/xdpe12284.c b/drivers/hwmon/pmbus/xdpe12284.c
index 18fffc5d749b..32bc7736d609 100644
--- a/drivers/hwmon/pmbus/xdpe12284.c
+++ b/drivers/hwmon/pmbus/xdpe12284.c
@@ -124,7 +124,7 @@ static int xdpe122_identify(struct i2c_client *client,
return 0;
}
-static const struct regulator_desc xdpe122_reg_desc[] = {
+static const struct regulator_desc __maybe_unused xdpe122_reg_desc[] = {
PMBUS_REGULATOR("vout", 0),
PMBUS_REGULATOR("vout", 1),
};
diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index b86d9df7105d..52c9e7d3f2ae 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -708,10 +708,21 @@ static int tmp401_probe(struct i2c_client *client)
return 0;
}
+static const struct of_device_id __maybe_unused tmp4xx_of_match[] = {
+ { .compatible = "ti,tmp401", },
+ { .compatible = "ti,tmp411", },
+ { .compatible = "ti,tmp431", },
+ { .compatible = "ti,tmp432", },
+ { .compatible = "ti,tmp435", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, tmp4xx_of_match);
+
static struct i2c_driver tmp401_driver = {
.class = I2C_CLASS_HWMON,
.driver = {
.name = "tmp401",
+ .of_match_table = of_match_ptr(tmp4xx_of_match),
},
.probe_new = tmp401_probe,
.id_table = tmp401_id,
diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index c0364314877e..c16157ee8c52 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -82,6 +82,7 @@
#define ISMT_DESC_ENTRIES 2 /* number of descriptor entries */
#define ISMT_MAX_RETRIES 3 /* number of SMBus retries to attempt */
+#define ISMT_LOG_ENTRIES 3 /* number of interrupt cause log entries */
/* Hardware Descriptor Constants - Control Field */
#define ISMT_DESC_CWRL 0x01 /* Command/Write Length */
@@ -175,6 +176,8 @@ struct ismt_priv {
u8 head; /* ring buffer head pointer */
struct completion cmp; /* interrupt completion */
u8 buffer[I2C_SMBUS_BLOCK_MAX + 16]; /* temp R/W data buffer */
+ dma_addr_t log_dma;
+ u32 *log;
};
static const struct pci_device_id ismt_ids[] = {
@@ -411,6 +414,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
memset(desc, 0, sizeof(struct ismt_desc));
desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write);
+ /* Always clear the log entries */
+ memset(priv->log, 0, ISMT_LOG_ENTRIES * sizeof(u32));
+
/* Initialize common control bits */
if (likely(pci_dev_msi_enabled(priv->pci_dev)))
desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR;
@@ -708,6 +714,8 @@ static void ismt_hw_init(struct ismt_priv *priv)
/* initialize the Master Descriptor Base Address (MDBA) */
writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA);
+ writeq(priv->log_dma, priv->smba + ISMT_GR_SMTICL);
+
/* initialize the Master Control Register (MCTRL) */
writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL);
@@ -795,6 +803,12 @@ static int ismt_dev_init(struct ismt_priv *priv)
priv->head = 0;
init_completion(&priv->cmp);
+ priv->log = dmam_alloc_coherent(&priv->pci_dev->dev,
+ ISMT_LOG_ENTRIES * sizeof(u32),
+ &priv->log_dma, GFP_KERNEL);
+ if (!priv->log)
+ return -ENOMEM;
+
return 0;
}
diff --git a/drivers/i2c/busses/i2c-mt7621.c b/drivers/i2c/busses/i2c-mt7621.c
index 45fe4a7fe0c0..901f0fb04fee 100644
--- a/drivers/i2c/busses/i2c-mt7621.c
+++ b/drivers/i2c/busses/i2c-mt7621.c
@@ -304,7 +304,8 @@ static int mtk_i2c_probe(struct platform_device *pdev)
if (i2c->bus_freq == 0) {
dev_warn(i2c->dev, "clock-frequency 0 not supported\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_disable_clk;
}
adap = &i2c->adap;
@@ -322,10 +323,15 @@ static int mtk_i2c_probe(struct platform_device *pdev)
ret = i2c_add_adapter(adap);
if (ret < 0)
- return ret;
+ goto err_disable_clk;
dev_info(&pdev->dev, "clock %u kHz\n", i2c->bus_freq / 1000);
+ return 0;
+
+err_disable_clk:
+ clk_disable_unprepare(i2c->clk);
+
return ret;
}
diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c
index 12c90aa0900e..a77cd86fe75e 100644
--- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c
+++ b/drivers/i2c/busses/i2c-thunderx-pcidrv.c
@@ -213,6 +213,7 @@ static int thunder_i2c_probe_pci(struct pci_dev *pdev,
i2c->adap.bus_recovery_info = &octeon_i2c_recovery_info;
i2c->adap.dev.parent = dev;
i2c->adap.dev.of_node = pdev->dev.of_node;
+ i2c->adap.dev.fwnode = dev->fwnode;
snprintf(i2c->adap.name, sizeof(i2c->adap.name),
"Cavium ThunderX i2c adapter at %s", dev_name(dev));
i2c_set_adapdata(&i2c->adap, i2c);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b7640cfe0020..47551ab73ca8 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -69,7 +69,12 @@ static unsigned int preferred_states_mask;
static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
static unsigned long auto_demotion_disable_flags;
-static bool disable_promotion_to_c1e;
+
+static enum {
+ C1E_PROMOTION_PRESERVE,
+ C1E_PROMOTION_ENABLE,
+ C1E_PROMOTION_DISABLE
+} c1e_promotion = C1E_PROMOTION_PRESERVE;
struct idle_cpu {
struct cpuidle_state *state_table;
@@ -1398,8 +1403,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
-static void c1e_promotion_enable(void);
-
/**
* ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
*
@@ -1578,17 +1581,14 @@ static void __init spr_idle_state_table_update(void)
unsigned long long msr;
/* Check if user prefers C1E over C1. */
- if (preferred_states_mask & BIT(2)) {
- if (preferred_states_mask & BIT(1))
- /* Both can't be enabled, stick to the defaults. */
- return;
-
+ if ((preferred_states_mask & BIT(2)) &&
+ !(preferred_states_mask & BIT(1))) {
+ /* Disable C1 and enable C1E. */
spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
/* Enable C1E using the "C1E promotion" bit. */
- c1e_promotion_enable();
- disable_promotion_to_c1e = false;
+ c1e_promotion = C1E_PROMOTION_ENABLE;
}
/*
@@ -1754,7 +1754,9 @@ static int intel_idle_cpu_init(unsigned int cpu)
if (auto_demotion_disable_flags)
auto_demotion_disable();
- if (disable_promotion_to_c1e)
+ if (c1e_promotion == C1E_PROMOTION_ENABLE)
+ c1e_promotion_enable();
+ else if (c1e_promotion == C1E_PROMOTION_DISABLE)
c1e_promotion_disable();
return 0;
@@ -1833,7 +1835,8 @@ static int __init intel_idle_init(void)
if (icpu) {
cpuidle_state_table = icpu->state_table;
auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
- disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
+ if (icpu->disable_promotion_to_c1e)
+ c1e_promotion = C1E_PROMOTION_DISABLE;
if (icpu->use_acpi || force_use_acpi)
intel_idle_acpi_cst_extract();
} else if (!intel_idle_acpi_cst_extract()) {
diff --git a/drivers/iio/adc/ad7280a.c b/drivers/iio/adc/ad7280a.c
index ef9d27759961..ec9acbf12b9a 100644
--- a/drivers/iio/adc/ad7280a.c
+++ b/drivers/iio/adc/ad7280a.c
@@ -745,7 +745,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev,
case IIO_EV_DIR_RISING:
addr = AD7280A_CELL_OVERVOLTAGE_REG;
ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
- 1, val);
+ 1, value);
if (ret)
break;
st->cell_threshhigh = value;
@@ -753,7 +753,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev,
case IIO_EV_DIR_FALLING:
addr = AD7280A_CELL_UNDERVOLTAGE_REG;
ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
- 1, val);
+ 1, value);
if (ret)
break;
st->cell_threshlow = value;
@@ -770,18 +770,18 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev,
case IIO_EV_DIR_RISING:
addr = AD7280A_AUX_ADC_OVERVOLTAGE_REG;
ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
- 1, val);
+ 1, value);
if (ret)
break;
- st->aux_threshhigh = val;
+ st->aux_threshhigh = value;
break;
case IIO_EV_DIR_FALLING:
addr = AD7280A_AUX_ADC_UNDERVOLTAGE_REG;
ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr,
- 1, val);
+ 1, value);
if (ret)
break;
- st->aux_threshlow = val;
+ st->aux_threshlow = value;
break;
default:
ret = -EINVAL;
diff --git a/drivers/iio/chemical/scd4x.c b/drivers/iio/chemical/scd4x.c
index 20d4e7584e92..37143b5526ee 100644
--- a/drivers/iio/chemical/scd4x.c
+++ b/drivers/iio/chemical/scd4x.c
@@ -471,12 +471,15 @@ static ssize_t calibration_forced_value_store(struct device *dev,
ret = scd4x_write_and_fetch(state, CMD_FRC, arg, &val, sizeof(val));
mutex_unlock(&state->lock);
+ if (ret)
+ return ret;
+
if (val == 0xff) {
dev_err(dev, "forced calibration has failed");
return -EINVAL;
}
- return ret ?: len;
+ return len;
}
static IIO_DEVICE_ATTR_RW(calibration_auto_enable, 0);
diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c
index 97f13c0b9631..d5ea1a1be122 100644
--- a/drivers/iio/dac/ad3552r.c
+++ b/drivers/iio/dac/ad3552r.c
@@ -656,7 +656,7 @@ static int ad3552r_reset(struct ad3552r_desc *dac)
{
struct reg_addr_pool addr;
int ret;
- u16 val;
+ int val;
dac->gpio_reset = devm_gpiod_get_optional(&dac->spi->dev, "reset",
GPIOD_OUT_LOW);
@@ -809,10 +809,10 @@ static int ad3552r_configure_custom_gain(struct ad3552r_desc *dac,
gain_child = fwnode_get_named_child_node(child,
"custom-output-range-config");
- if (IS_ERR(gain_child)) {
+ if (!gain_child) {
dev_err(dev,
"mandatory custom-output-range-config property missing\n");
- return PTR_ERR(gain_child);
+ return -EINVAL;
}
dac->ch_data[ch].range_override = 1;
diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
index 14cfabacbea5..fdf824041497 100644
--- a/drivers/iio/dac/ad5446.c
+++ b/drivers/iio/dac/ad5446.c
@@ -178,7 +178,7 @@ static int ad5446_read_raw(struct iio_dev *indio_dev,
switch (m) {
case IIO_CHAN_INFO_RAW:
- *val = st->cached_val;
+ *val = st->cached_val >> chan->scan_type.shift;
return IIO_VAL_INT;
case IIO_CHAN_INFO_SCALE:
*val = st->vref_mv;
diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c
index a424b7220b61..4434c1b2a322 100644
--- a/drivers/iio/dac/ad5592r-base.c
+++ b/drivers/iio/dac/ad5592r-base.c
@@ -522,7 +522,7 @@ static int ad5592r_alloc_channels(struct iio_dev *iio_dev)
if (!ret)
st->channel_modes[reg] = tmp;
- fwnode_property_read_u32(child, "adi,off-state", &tmp);
+ ret = fwnode_property_read_u32(child, "adi,off-state", &tmp);
if (!ret)
st->channel_offstate[reg] = tmp;
}
diff --git a/drivers/iio/dac/ltc2688.c b/drivers/iio/dac/ltc2688.c
index e41861d29767..2f9c384885f4 100644
--- a/drivers/iio/dac/ltc2688.c
+++ b/drivers/iio/dac/ltc2688.c
@@ -298,7 +298,7 @@ static int ltc2688_read_raw(struct iio_dev *indio_dev,
if (ret)
return ret;
- *val = 16;
+ *val2 = 16;
return IIO_VAL_FRACTIONAL_LOG2;
case IIO_CHAN_INFO_CALIBBIAS:
ret = regmap_read(st->regmap,
diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c
index 4a3b8d875518..0b775f943db3 100644
--- a/drivers/iio/dac/ti-dac5571.c
+++ b/drivers/iio/dac/ti-dac5571.c
@@ -19,6 +19,7 @@
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
+#include <linux/property.h>
#include <linux/regulator/consumer.h>
enum chip_id {
@@ -311,6 +312,7 @@ static int dac5571_probe(struct i2c_client *client,
const struct dac5571_spec *spec;
struct dac5571_data *data;
struct iio_dev *indio_dev;
+ enum chip_id chip_id;
int ret, i;
indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
@@ -326,7 +328,13 @@ static int dac5571_probe(struct i2c_client *client,
indio_dev->modes = INDIO_DIRECT_MODE;
indio_dev->channels = dac5571_channels;
- spec = &dac5571_spec[id->driver_data];
+ if (dev_fwnode(dev))
+ chip_id = (uintptr_t)device_get_match_data(dev);
+ else
+ chip_id = id->driver_data;
+
+ spec = &dac5571_spec[chip_id];
+
indio_dev->num_channels = spec->num_channels;
data->spec = spec;
@@ -385,15 +393,15 @@ static int dac5571_remove(struct i2c_client *i2c)
}
static const struct of_device_id dac5571_of_id[] = {
- {.compatible = "ti,dac5571"},
- {.compatible = "ti,dac6571"},
- {.compatible = "ti,dac7571"},
- {.compatible = "ti,dac5574"},
- {.compatible = "ti,dac6574"},
- {.compatible = "ti,dac7574"},
- {.compatible = "ti,dac5573"},
- {.compatible = "ti,dac6573"},
- {.compatible = "ti,dac7573"},
+ {.compatible = "ti,dac5571", .data = (void *)single_8bit},
+ {.compatible = "ti,dac6571", .data = (void *)single_10bit},
+ {.compatible = "ti,dac7571", .data = (void *)single_12bit},
+ {.compatible = "ti,dac5574", .data = (void *)quad_8bit},
+ {.compatible = "ti,dac6574", .data = (void *)quad_10bit},
+ {.compatible = "ti,dac7574", .data = (void *)quad_12bit},
+ {.compatible = "ti,dac5573", .data = (void *)quad_8bit},
+ {.compatible = "ti,dac6573", .data = (void *)quad_10bit},
+ {.compatible = "ti,dac7573", .data = (void *)quad_12bit},
{}
};
MODULE_DEVICE_TABLE(of, dac5571_of_id);
diff --git a/drivers/iio/filter/Kconfig b/drivers/iio/filter/Kconfig
index 3ae35817ad82..a85b345ea14e 100644
--- a/drivers/iio/filter/Kconfig
+++ b/drivers/iio/filter/Kconfig
@@ -8,6 +8,7 @@ menu "Filters"
config ADMV8818
tristate "Analog Devices ADMV8818 High-Pass and Low-Pass Filter"
depends on SPI && COMMON_CLK && 64BIT
+ select REGMAP_SPI
help
Say yes here to build support for Analog Devices ADMV8818
2 GHz to 18 GHz, Digitally Tunable, High-Pass and Low-Pass Filter.
diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c
index 824b5124a5f5..01336105792e 100644
--- a/drivers/iio/imu/bmi160/bmi160_core.c
+++ b/drivers/iio/imu/bmi160/bmi160_core.c
@@ -730,7 +730,7 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi)
ret = regmap_write(data->regmap, BMI160_REG_CMD, BMI160_CMD_SOFTRESET);
if (ret)
- return ret;
+ goto disable_regulator;
usleep_range(BMI160_SOFTRESET_USLEEP, BMI160_SOFTRESET_USLEEP + 1);
@@ -741,29 +741,37 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi)
if (use_spi) {
ret = regmap_read(data->regmap, BMI160_REG_DUMMY, &val);
if (ret)
- return ret;
+ goto disable_regulator;
}
ret = regmap_read(data->regmap, BMI160_REG_CHIP_ID, &val);
if (ret) {
dev_err(dev, "Error reading chip id\n");
- return ret;
+ goto disable_regulator;
}
if (val != BMI160_CHIP_ID_VAL) {
dev_err(dev, "Wrong chip id, got %x expected %x\n",
val, BMI160_CHIP_ID_VAL);
- return -ENODEV;
+ ret = -ENODEV;
+ goto disable_regulator;
}
ret = bmi160_set_mode(data, BMI160_ACCEL, true);
if (ret)
- return ret;
+ goto disable_regulator;
ret = bmi160_set_mode(data, BMI160_GYRO, true);
if (ret)
- return ret;
+ goto disable_accel;
return 0;
+
+disable_accel:
+ bmi160_set_mode(data, BMI160_ACCEL, false);
+
+disable_regulator:
+ regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies);
+ return ret;
}
static int bmi160_data_rdy_trigger_set_state(struct iio_trigger *trig,
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
index 33d9afb1ba91..d4a692b838d0 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
@@ -18,12 +18,15 @@ static int inv_icm42600_i2c_bus_setup(struct inv_icm42600_state *st)
unsigned int mask, val;
int ret;
- /* setup interface registers */
- ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6,
- INV_ICM42600_INTF_CONFIG6_MASK,
- INV_ICM42600_INTF_CONFIG6_I3C_EN);
- if (ret)
- return ret;
+ /*
+ * setup interface registers
+ * This register write to REG_INTF_CONFIG6 enables a spike filter that
+ * is impacting the line and can prevent the I2C ACK to be seen by the
+ * controller. So we don't test the return value.
+ */
+ regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6,
+ INV_ICM42600_INTF_CONFIG6_MASK,
+ INV_ICM42600_INTF_CONFIG6_I3C_EN);
ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG4,
INV_ICM42600_INTF_CONFIG4_I3C_BUS_ONLY, 0);
diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index 088f748b683e..2432e697150c 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -416,6 +416,7 @@ static int ak8975_power_on(const struct ak8975_data *data)
if (ret) {
dev_warn(&data->client->dev,
"Failed to enable specified Vid supply\n");
+ regulator_disable(data->vdd);
return ret;
}
diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c
index 0d9bbbb50cb4..70c37f664f6d 100644
--- a/drivers/iio/proximity/sx9324.c
+++ b/drivers/iio/proximity/sx9324.c
@@ -70,13 +70,17 @@
#define SX9324_REG_AFE_PH2 0x2a
#define SX9324_REG_AFE_PH3 0x2b
#define SX9324_REG_AFE_CTRL8 0x2c
-#define SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM 0x02
+#define SX9324_REG_AFE_CTRL8_RESERVED 0x10
+#define SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM 0x02
#define SX9324_REG_AFE_CTRL9 0x2d
#define SX9324_REG_AFE_CTRL9_AGAIN_1 0x08
#define SX9324_REG_PROX_CTRL0 0x30
#define SX9324_REG_PROX_CTRL0_GAIN_MASK GENMASK(5, 3)
-#define SX9324_REG_PROX_CTRL0_GAIN_1 0x80
+#define SX9324_REG_PROX_CTRL0_GAIN_SHIFT 3
+#define SX9324_REG_PROX_CTRL0_GAIN_RSVD 0x0
+#define SX9324_REG_PROX_CTRL0_GAIN_1 0x1
+#define SX9324_REG_PROX_CTRL0_GAIN_8 0x4
#define SX9324_REG_PROX_CTRL0_RAWFILT_MASK GENMASK(2, 0)
#define SX9324_REG_PROX_CTRL0_RAWFILT_1P50 0x01
#define SX9324_REG_PROX_CTRL1 0x31
@@ -379,7 +383,14 @@ static int sx9324_read_gain(struct sx_common_data *data,
if (ret)
return ret;
- *val = 1 << FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval);
+ regval = FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval);
+ if (regval)
+ regval--;
+ else if (regval == SX9324_REG_PROX_CTRL0_GAIN_RSVD ||
+ regval > SX9324_REG_PROX_CTRL0_GAIN_8)
+ return -EINVAL;
+
+ *val = 1 << regval;
return IIO_VAL_INT;
}
@@ -725,8 +736,12 @@ static int sx9324_write_gain(struct sx_common_data *data,
unsigned int gain, reg;
int ret;
- gain = ilog2(val);
reg = SX9324_REG_PROX_CTRL0 + chan->channel / 2;
+
+ gain = ilog2(val) + 1;
+ if (val <= 0 || gain > SX9324_REG_PROX_CTRL0_GAIN_8)
+ return -EINVAL;
+
gain = FIELD_PREP(SX9324_REG_PROX_CTRL0_GAIN_MASK, gain);
mutex_lock(&data->mutex);
@@ -781,12 +796,15 @@ static const struct sx_common_reg_default sx9324_default_regs[] = {
{ SX9324_REG_AFE_PH2, 0x1a },
{ SX9324_REG_AFE_PH3, 0x16 },
- { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM },
+ { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESERVED |
+ SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM },
{ SX9324_REG_AFE_CTRL9, SX9324_REG_AFE_CTRL9_AGAIN_1 },
- { SX9324_REG_PROX_CTRL0, SX9324_REG_PROX_CTRL0_GAIN_1 |
+ { SX9324_REG_PROX_CTRL0,
+ SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT |
SX9324_REG_PROX_CTRL0_RAWFILT_1P50 },
- { SX9324_REG_PROX_CTRL1, SX9324_REG_PROX_CTRL0_GAIN_1 |
+ { SX9324_REG_PROX_CTRL1,
+ SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT |
SX9324_REG_PROX_CTRL0_RAWFILT_1P50 },
{ SX9324_REG_PROX_CTRL2, SX9324_REG_PROX_CTRL2_AVGNEG_THRESH_16K },
{ SX9324_REG_PROX_CTRL3, SX9324_REG_PROX_CTRL3_AVGDEB_2SAMPLES |
diff --git a/drivers/iio/proximity/sx_common.c b/drivers/iio/proximity/sx_common.c
index a7c07316a0a9..8ad814d96b7e 100644
--- a/drivers/iio/proximity/sx_common.c
+++ b/drivers/iio/proximity/sx_common.c
@@ -521,6 +521,7 @@ int sx_common_probe(struct i2c_client *client,
return dev_err_probe(dev, ret, "error reading WHOAMI\n");
ACPI_COMPANION_SET(&indio_dev->dev, ACPI_COMPANION(dev));
+ indio_dev->dev.of_node = client->dev.of_node;
indio_dev->modes = INDIO_DIRECT_MODE;
indio_dev->channels = data->chip_info->iio_channels;
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index dedb3b7edd8d..638bf4a1ed94 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -2308,10 +2308,8 @@ err:
return NULL;
}
-static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
+static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
{
- struct irdma_cm_node *cm_node =
- container_of(rcu_head, struct irdma_cm_node, rcu_head);
struct irdma_cm_core *cm_core = cm_node->cm_core;
struct irdma_qp *iwqp;
struct irdma_cm_info nfo;
@@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
}
cm_core->cm_free_ah(cm_node);
- kfree(cm_node);
}
/**
@@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- /* wait for all list walkers to exit their grace period */
- call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
+ irdma_destroy_connection(cm_node);
+
+ kfree_rcu(cm_node, rcu_head);
}
/**
@@ -3246,15 +3244,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
*/
void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
{
- unsigned long flags;
-
if (!cm_core)
return;
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (timer_pending(&cm_core->tcp_timer))
- del_timer_sync(&cm_core->tcp_timer);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ del_timer_sync(&cm_core->tcp_timer);
destroy_workqueue(cm_core->event_wq);
cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
@@ -3467,12 +3460,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
}
cm_id = iwqp->cm_id;
- /* make sure we havent already closed this connection */
- if (!cm_id) {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return;
- }
-
original_hw_tcp_state = iwqp->hw_tcp_state;
original_ibqp_state = iwqp->ibqp_state;
last_ae = iwqp->last_aeq;
@@ -3494,11 +3481,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
disconn_status = -ECONNRESET;
}
- if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
- original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
- last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
- last_ae == IRDMA_AE_BAD_CLOSE ||
- last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
+ if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+ original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+ last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+ last_ae == IRDMA_AE_BAD_CLOSE ||
+ last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
issue_close = 1;
iwqp->cm_id = NULL;
qp->term_flags = 0;
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 346c2c5dabdf..81760415d66c 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
u32 local_ipaddr[4] = {};
bool ipv4 = true;
- real_dev = rdma_vlan_dev_real_dev(netdev);
- if (!real_dev)
- real_dev = netdev;
-
- ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
- if (!ibdev)
- return NOTIFY_DONE;
-
- iwdev = to_iwdev(ibdev);
-
switch (event) {
case NETEVENT_NEIGH_UPDATE:
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
p = (__be32 *)neigh->primary_key;
if (neigh->tbl->family == AF_INET6) {
ipv4 = false;
@@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
irdma_manage_arp_cache(iwdev->rf, neigh->ha,
local_ipaddr, ipv4,
IRDMA_ARP_DELETE);
+ ib_device_put(ibdev);
break;
default:
break;
}
- ib_device_put(ibdev);
-
return NOTIFY_DONE;
}
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 46f475394af5..52f3e88f8569 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
if (dont_wait) {
- if (iwqp->cm_id && iwqp->hw_tcp_state) {
+ if (iwqp->hw_tcp_state) {
spin_lock_irqsave(&iwqp->lock, flags);
iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
iwqp->last_aeq = IRDMA_AE_RESET_SENT;
spin_unlock_irqrestore(&iwqp->lock, flags);
- irdma_cm_disconn(iwqp);
}
+ irdma_cm_disconn(iwqp);
} else {
int close_timer_started;
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index ae8f11cb704a..873a9b10307c 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
}
/**
- * rxe_mcast_delete - delete multicast address from rxe device
+ * rxe_mcast_del - delete multicast address from rxe device
* @rxe: rxe device object
* @mgid: multicast address as a gid
*
* Returns 0 on success else an error
*/
-static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
{
unsigned char ll_addr[ETH_ALEN];
@@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
struct rxe_mcg *mcg;
- unsigned long flags;
- spin_lock_irqsave(&rxe->mcg_lock, flags);
+ spin_lock_bh(&rxe->mcg_lock);
mcg = __rxe_lookup_mcg(rxe, mgid);
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
return mcg;
}
@@ -159,17 +158,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
* @mcg: new mcg object
*
* Context: caller should hold rxe->mcg lock
- * Returns: 0 on success else an error
*/
-static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
- struct rxe_mcg *mcg)
+static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
+ struct rxe_mcg *mcg)
{
- int err;
-
- err = rxe_mcast_add(rxe, mgid);
- if (unlikely(err))
- return err;
-
kref_init(&mcg->ref_cnt);
memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
INIT_LIST_HEAD(&mcg->qp_list);
@@ -184,8 +176,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
*/
kref_get(&mcg->ref_cnt);
__rxe_insert_mcg(mcg);
-
- return 0;
}
/**
@@ -198,7 +188,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
struct rxe_mcg *mcg, *tmp;
- unsigned long flags;
int err;
if (rxe->attr.max_mcast_grp == 0)
@@ -209,36 +198,38 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
if (mcg)
return mcg;
+ /* check to see if we have reached limit */
+ if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
+ err = -ENOMEM;
+ goto err_dec;
+ }
+
/* speculative alloc of new mcg */
mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
if (!mcg)
return ERR_PTR(-ENOMEM);
- spin_lock_irqsave(&rxe->mcg_lock, flags);
+ spin_lock_bh(&rxe->mcg_lock);
/* re-check to see if someone else just added it */
tmp = __rxe_lookup_mcg(rxe, mgid);
if (tmp) {
+ spin_unlock_bh(&rxe->mcg_lock);
+ atomic_dec(&rxe->mcg_num);
kfree(mcg);
- mcg = tmp;
- goto out;
+ return tmp;
}
- if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
- err = -ENOMEM;
- goto err_dec;
- }
+ __rxe_init_mcg(rxe, mgid, mcg);
+ spin_unlock_bh(&rxe->mcg_lock);
- err = __rxe_init_mcg(rxe, mgid, mcg);
- if (err)
- goto err_dec;
-out:
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
- return mcg;
+ /* add mcast address outside of lock */
+ err = rxe_mcast_add(rxe, mgid);
+ if (!err)
+ return mcg;
+ kfree(mcg);
err_dec:
atomic_dec(&rxe->mcg_num);
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
- kfree(mcg);
return ERR_PTR(err);
}
@@ -268,7 +259,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
__rxe_remove_mcg(mcg);
kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
- rxe_mcast_delete(mcg->rxe, &mcg->mgid);
atomic_dec(&rxe->mcg_num);
}
@@ -280,11 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
*/
static void rxe_destroy_mcg(struct rxe_mcg *mcg)
{
- unsigned long flags;
+ /* delete mcast address outside of lock */
+ rxe_mcast_del(mcg->rxe, &mcg->mgid);
- spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
+ spin_lock_bh(&mcg->rxe->mcg_lock);
__rxe_destroy_mcg(mcg);
- spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
+ spin_unlock_bh(&mcg->rxe->mcg_lock);
}
/**
@@ -339,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
struct rxe_dev *rxe = mcg->rxe;
struct rxe_mca *mca, *tmp;
- unsigned long flags;
int err;
/* check to see if the qp is already a member of the group */
- spin_lock_irqsave(&rxe->mcg_lock, flags);
+ spin_lock_bh(&rxe->mcg_lock);
list_for_each_entry(mca, &mcg->qp_list, qp_list) {
if (mca->qp == qp) {
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
return 0;
}
}
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
/* speculative alloc new mca without using GFP_ATOMIC */
mca = kzalloc(sizeof(*mca), GFP_KERNEL);
if (!mca)
return -ENOMEM;
- spin_lock_irqsave(&rxe->mcg_lock, flags);
+ spin_lock_bh(&rxe->mcg_lock);
/* re-check to see if someone else just attached qp */
list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
if (tmp->qp == qp) {
@@ -371,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
if (err)
kfree(mca);
out:
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
return err;
}
@@ -405,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
struct rxe_dev *rxe = mcg->rxe;
struct rxe_mca *mca, *tmp;
- unsigned long flags;
- spin_lock_irqsave(&rxe->mcg_lock, flags);
+ spin_lock_bh(&rxe->mcg_lock);
list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
if (mca->qp == qp) {
__rxe_cleanup_mca(mca, mcg);
@@ -421,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
if (atomic_read(&mcg->qp_num) <= 0)
__rxe_destroy_mcg(mcg);
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
return 0;
}
}
/* we didn't find the qp on the list */
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 16fc7ea1298d..9cd0eaff98de 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -680,6 +680,11 @@ static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
* It is assumed that the access permissions if originally good
* are OK and the mappings to be unchanged.
*
+ * TODO: If someone reregisters an MR to change its size or
+ * access permissions during the processing of an RDMA read
+ * we should kill the responder resource and complete the
+ * operation with an error.
+ *
* Return: mr on success else NULL
*/
static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
@@ -690,23 +695,27 @@ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
if (rkey_is_mw(rkey)) {
mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
- if (!mw || mw->rkey != rkey)
+ if (!mw)
return NULL;
- if (mw->state != RXE_MW_STATE_VALID) {
+ mr = mw->mr;
+ if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
+ !mr || mr->state != RXE_MR_STATE_VALID) {
rxe_put(mw);
return NULL;
}
- mr = mw->mr;
+ rxe_get(mr);
rxe_put(mw);
- } else {
- mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
- if (!mr || mr->rkey != rkey)
- return NULL;
+
+ return mr;
}
- if (mr->state != RXE_MR_STATE_VALID) {
+ mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ if (!mr)
+ return NULL;
+
+ if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
rxe_put(mr);
return NULL;
}
@@ -736,8 +745,14 @@ static enum resp_states read_reply(struct rxe_qp *qp,
}
if (res->state == rdatm_res_state_new) {
- mr = qp->resp.mr;
- qp->resp.mr = NULL;
+ if (!res->replay) {
+ mr = qp->resp.mr;
+ qp->resp.mr = NULL;
+ } else {
+ mr = rxe_recheck_mr(qp, res->read.rkey);
+ if (!mr)
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
if (res->read.resid <= mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 7acdd3c3a599..17f34d584cd9 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -968,14 +968,15 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_set_inuse(new_cep);
rv = siw_proc_mpareq(new_cep);
- siw_cep_set_free(new_cep);
-
if (rv != -EAGAIN) {
siw_cep_put(cep);
new_cep->listen_cep = NULL;
- if (rv)
+ if (rv) {
+ siw_cep_set_free(new_cep);
goto error;
+ }
}
+ siw_cep_set_free(new_cep);
}
return;
diff --git a/drivers/input/keyboard/cypress-sf.c b/drivers/input/keyboard/cypress-sf.c
index c28996028e80..9a23eed6a4f4 100644
--- a/drivers/input/keyboard/cypress-sf.c
+++ b/drivers/input/keyboard/cypress-sf.c
@@ -61,6 +61,14 @@ static irqreturn_t cypress_sf_irq_handler(int irq, void *devid)
return IRQ_HANDLED;
}
+static void cypress_sf_disable_regulators(void *arg)
+{
+ struct cypress_sf_data *touchkey = arg;
+
+ regulator_bulk_disable(ARRAY_SIZE(touchkey->regulators),
+ touchkey->regulators);
+}
+
static int cypress_sf_probe(struct i2c_client *client)
{
struct cypress_sf_data *touchkey;
@@ -121,6 +129,12 @@ static int cypress_sf_probe(struct i2c_client *client)
return error;
}
+ error = devm_add_action_or_reset(&client->dev,
+ cypress_sf_disable_regulators,
+ touchkey);
+ if (error)
+ return error;
+
touchkey->input_dev = devm_input_allocate_device(&client->dev);
if (!touchkey->input_dev) {
dev_err(&client->dev, "Failed to allocate input device\n");
diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index 43375b38ee59..8a7ce41b8c56 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c
@@ -393,7 +393,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
* revision register.
*/
error = pm_runtime_get_sync(dev);
- if (error) {
+ if (error < 0) {
dev_err(dev, "pm_runtime_get_sync() failed\n");
pm_runtime_put_noidle(dev);
return error;
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 2bd407d86bae..e9bd36adbe47 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -756,15 +756,12 @@ static int ili251x_firmware_reset(struct i2c_client *client)
return ili251x_firmware_busy(client);
}
-static void ili251x_hardware_reset(struct device *dev)
+static void ili210x_hardware_reset(struct gpio_desc *reset_gpio)
{
- struct i2c_client *client = to_i2c_client(dev);
- struct ili210x *priv = i2c_get_clientdata(client);
-
/* Reset the controller */
- gpiod_set_value_cansleep(priv->reset_gpio, 1);
- usleep_range(10000, 15000);
- gpiod_set_value_cansleep(priv->reset_gpio, 0);
+ gpiod_set_value_cansleep(reset_gpio, 1);
+ usleep_range(12000, 15000);
+ gpiod_set_value_cansleep(reset_gpio, 0);
msleep(300);
}
@@ -773,6 +770,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
const char *buf, size_t count)
{
struct i2c_client *client = to_i2c_client(dev);
+ struct ili210x *priv = i2c_get_clientdata(client);
const char *fwname = ILI251X_FW_FILENAME;
const struct firmware *fw;
u16 ac_end, df_end;
@@ -803,7 +801,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
dev_dbg(dev, "Firmware update started, firmware=%s\n", fwname);
- ili251x_hardware_reset(dev);
+ ili210x_hardware_reset(priv->reset_gpio);
error = ili251x_firmware_reset(client);
if (error)
@@ -858,7 +856,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev,
error = count;
exit:
- ili251x_hardware_reset(dev);
+ ili210x_hardware_reset(priv->reset_gpio);
dev_dbg(dev, "Firmware update ended, error=%i\n", error);
enable_irq(client->irq);
kfree(fwbuf);
@@ -951,9 +949,7 @@ static int ili210x_i2c_probe(struct i2c_client *client,
if (error)
return error;
- usleep_range(50, 100);
- gpiod_set_value_cansleep(reset_gpio, 0);
- msleep(100);
+ ili210x_hardware_reset(reset_gpio);
}
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 9050ca1f4285..808f6e7a8048 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1087,9 +1087,15 @@ static int of_count_icc_providers(struct device_node *np)
{
struct device_node *child;
int count = 0;
+ const struct of_device_id __maybe_unused ignore_list[] = {
+ { .compatible = "qcom,sc7180-ipa-virt" },
+ { .compatible = "qcom,sdx55-ipa-virt" },
+ {}
+ };
for_each_available_child_of_node(np, child) {
- if (of_property_read_bool(child, "#interconnect-cells"))
+ if (of_property_read_bool(child, "#interconnect-cells") &&
+ likely(!of_match_node(ignore_list, child)))
count++;
count += of_count_icc_providers(child);
}
diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c
index 12d59c36df53..5f7c0f85fa8e 100644
--- a/drivers/interconnect/qcom/sc7180.c
+++ b/drivers/interconnect/qcom/sc7180.c
@@ -47,7 +47,6 @@ DEFINE_QNODE(qnm_mnoc_sf, SC7180_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC7180_SLAVE_GEM
DEFINE_QNODE(qnm_snoc_gc, SC7180_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC7180_SLAVE_LLCC);
DEFINE_QNODE(qnm_snoc_sf, SC7180_MASTER_SNOC_SF_MEM_NOC, 1, 16, SC7180_SLAVE_LLCC);
DEFINE_QNODE(qxm_gpu, SC7180_MASTER_GFX3D, 2, 32, SC7180_SLAVE_GEM_NOC_SNOC, SC7180_SLAVE_LLCC);
-DEFINE_QNODE(ipa_core_master, SC7180_MASTER_IPA_CORE, 1, 8, SC7180_SLAVE_IPA_CORE);
DEFINE_QNODE(llcc_mc, SC7180_MASTER_LLCC, 2, 4, SC7180_SLAVE_EBI1);
DEFINE_QNODE(qhm_mnoc_cfg, SC7180_MASTER_CNOC_MNOC_CFG, 1, 4, SC7180_SLAVE_SERVICE_MNOC);
DEFINE_QNODE(qxm_camnoc_hf0, SC7180_MASTER_CAMNOC_HF0, 2, 32, SC7180_SLAVE_MNOC_HF_MEM_NOC);
@@ -129,7 +128,6 @@ DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SC7180_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4);
DEFINE_QNODE(qns_gem_noc_snoc, SC7180_SLAVE_GEM_NOC_SNOC, 1, 8, SC7180_MASTER_GEM_NOC_SNOC);
DEFINE_QNODE(qns_llcc, SC7180_SLAVE_LLCC, 1, 16, SC7180_MASTER_LLCC);
DEFINE_QNODE(srvc_gemnoc, SC7180_SLAVE_SERVICE_GEM_NOC, 1, 4);
-DEFINE_QNODE(ipa_core_slave, SC7180_SLAVE_IPA_CORE, 1, 8);
DEFINE_QNODE(ebi, SC7180_SLAVE_EBI1, 2, 4);
DEFINE_QNODE(qns_mem_noc_hf, SC7180_SLAVE_MNOC_HF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_HF_MEM_NOC);
DEFINE_QNODE(qns_mem_noc_sf, SC7180_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_SF_MEM_NOC);
@@ -160,7 +158,6 @@ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
DEFINE_QBCM(bcm_mm0, "MM0", false, &qns_mem_noc_hf);
DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
-DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
DEFINE_QBCM(bcm_cn0, "CN0", true, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_ahb2phy0, &qhs_aop, &qhs_aoss, &qhs_boot_rom, &qhs_camera_cfg, &qhs_camera_nrt_throttle_cfg, &qhs_camera_rt_throttle_cfg, &qhs_clk_ctl, &qhs_cpr_cx, &qhs_cpr_mx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_display_rt_throttle_cfg, &qhs_display_throttle_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_mss_cfg, &qhs_npu_cfg, &qhs_npu_dma_throttle_cfg, &qhs_npu_dsp_throttle_cfg, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qm_cfg, &qhs_qm_mpu_cfg, &qhs_qup0, &qhs_qup1, &qhs_security, &qhs_snoc_cfg, &qhs_tcsr, &qhs_tlmm_1, &qhs_tlmm_2, &qhs_tlmm_3, &qhs_ufs_mem_cfg, &qhs_usb3, &qhs_venus_cfg, &qhs_venus_throttle_cfg, &qhs_vsense_ctrl_cfg, &srvc_cnoc);
DEFINE_QBCM(bcm_mm1, "MM1", false, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qhm_mnoc_cfg, &qxm_mdp0, &qxm_rot, &qxm_venus0, &qxm_venus_arm9);
DEFINE_QBCM(bcm_sh2, "SH2", false, &acm_sys_tcu);
@@ -372,22 +369,6 @@ static struct qcom_icc_desc sc7180_gem_noc = {
.num_bcms = ARRAY_SIZE(gem_noc_bcms),
};
-static struct qcom_icc_bcm *ipa_virt_bcms[] = {
- &bcm_ip0,
-};
-
-static struct qcom_icc_node *ipa_virt_nodes[] = {
- [MASTER_IPA_CORE] = &ipa_core_master,
- [SLAVE_IPA_CORE] = &ipa_core_slave,
-};
-
-static struct qcom_icc_desc sc7180_ipa_virt = {
- .nodes = ipa_virt_nodes,
- .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
- .bcms = ipa_virt_bcms,
- .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
-};
-
static struct qcom_icc_bcm *mc_virt_bcms[] = {
&bcm_acv,
&bcm_mc0,
@@ -519,8 +500,6 @@ static const struct of_device_id qnoc_of_match[] = {
.data = &sc7180_dc_noc},
{ .compatible = "qcom,sc7180-gem-noc",
.data = &sc7180_gem_noc},
- { .compatible = "qcom,sc7180-ipa-virt",
- .data = &sc7180_ipa_virt},
{ .compatible = "qcom,sc7180-mc-virt",
.data = &sc7180_mc_virt},
{ .compatible = "qcom,sc7180-mmss-noc",
diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c
index 03d604f84cc5..e3ac25a997b7 100644
--- a/drivers/interconnect/qcom/sdx55.c
+++ b/drivers/interconnect/qcom/sdx55.c
@@ -18,7 +18,6 @@
#include "icc-rpmh.h"
#include "sdx55.h"
-DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE);
DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0);
DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC);
@@ -40,7 +39,6 @@ DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC);
DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC);
-DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8);
DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4);
DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0);
DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC);
@@ -82,7 +80,6 @@ DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8);
DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
-DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg);
DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr);
DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie);
@@ -219,22 +216,6 @@ static const struct qcom_icc_desc sdx55_system_noc = {
.num_bcms = ARRAY_SIZE(system_noc_bcms),
};
-static struct qcom_icc_bcm *ipa_virt_bcms[] = {
- &bcm_ip0,
-};
-
-static struct qcom_icc_node *ipa_virt_nodes[] = {
- [MASTER_IPA_CORE] = &ipa_core_master,
- [SLAVE_IPA_CORE] = &ipa_core_slave,
-};
-
-static const struct qcom_icc_desc sdx55_ipa_virt = {
- .nodes = ipa_virt_nodes,
- .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
- .bcms = ipa_virt_bcms,
- .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
-};
-
static const struct of_device_id qnoc_of_match[] = {
{ .compatible = "qcom,sdx55-mc-virt",
.data = &sdx55_mc_virt},
@@ -242,8 +223,6 @@ static const struct of_device_id qnoc_of_match[] = {
.data = &sdx55_mem_noc},
{ .compatible = "qcom,sdx55-system-noc",
.data = &sdx55_system_noc},
- { .compatible = "qcom,sdx55-ipa-virt",
- .data = &sdx55_ipa_virt},
{ }
};
MODULE_DEVICE_TABLE(of, qnoc_of_match);
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index decafb07ad08..8af0242a90d9 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -773,6 +773,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.get_resv_regions = apple_dart_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap = -1UL, /* Restricted during dart probe */
+ .owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = apple_dart_attach_dev,
.detach_dev = apple_dart_detach_dev,
@@ -859,16 +860,15 @@ static int apple_dart_probe(struct platform_device *pdev)
dart->dev = dev;
spin_lock_init(&dart->lock);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dart->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(dart->regs))
+ return PTR_ERR(dart->regs);
+
if (resource_size(res) < 0x4000) {
dev_err(dev, "MMIO region too small (%pr)\n", res);
return -EINVAL;
}
- dart->regs = devm_ioremap_resource(dev, res);
- if (IS_ERR(dart->regs))
- return PTR_ERR(dart->regs);
-
dart->irq = platform_get_irq(pdev, 0);
if (dart->irq < 0)
return -ENODEV;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 22ddd05bbdcd..c623dae1e115 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -183,7 +183,14 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
{
struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
struct arm_smmu_domain *smmu_domain = smmu_mn->domain;
- size_t size = end - start + 1;
+ size_t size;
+
+ /*
+ * The mm_types defines vm_end as the first byte after the end address,
+ * different from IOMMU subsystem using the last address of an address
+ * range. So do a simple translation here by calculating size correctly.
+ */
+ size = end - start;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
index 01e9b50b10a1..87bf522b9d2e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
@@ -258,6 +258,34 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi
dev_name(dev), err);
}
+static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg,
+ struct device *dev)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ const struct device_node *np = smmu->dev->of_node;
+
+ /*
+ * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
+ * entries to not be invalidated correctly. The problem is that the walk
+ * cache index generated for IOVA is not same across translation and
+ * invalidation requests. This is leading to page faults when PMD entry
+ * is released during unmap and populated with new PTE table during
+ * subsequent map request. Disabling large page mappings avoids the
+ * release of PMD entry and avoid translations seeing stale PMD entry in
+ * walk cache.
+ * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
+ * Tegra234.
+ */
+ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
+ of_device_is_compatible(np, "nvidia,tegra194-smmu")) {
+ smmu->pgsize_bitmap = PAGE_SIZE;
+ pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap;
+ }
+
+ return 0;
+}
+
static const struct arm_smmu_impl nvidia_smmu_impl = {
.read_reg = nvidia_smmu_read_reg,
.write_reg = nvidia_smmu_write_reg,
@@ -268,10 +296,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = {
.global_fault = nvidia_smmu_global_fault,
.context_fault = nvidia_smmu_context_fault,
.probe_finalize = nvidia_smmu_probe_finalize,
+ .init_context = nvidia_smmu_init_context,
};
static const struct arm_smmu_impl nvidia_smmu_single_impl = {
.probe_finalize = nvidia_smmu_probe_finalize,
+ .init_context = nvidia_smmu_init_context,
};
struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index df5c62ecf942..0ea47e17b379 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1588,7 +1588,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
unsigned long pfn, unsigned int pages,
int ih, int map)
{
- unsigned int mask = ilog2(__roundup_pow_of_two(pages));
+ unsigned int aligned_pages = __roundup_pow_of_two(pages);
+ unsigned int mask = ilog2(aligned_pages);
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
u16 did = domain->iommu_did[iommu->seq_id];
@@ -1600,10 +1601,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
if (domain_use_first_level(domain)) {
qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
} else {
+ unsigned long bitmask = aligned_pages - 1;
+
+ /*
+ * PSI masks the low order bits of the base address. If the
+ * address isn't aligned to the mask, then compute a mask value
+ * needed to ensure the target range is flushed.
+ */
+ if (unlikely(bitmask & pfn)) {
+ unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+ /*
+ * Since end_pfn <= pfn + bitmask, the only way bits
+ * higher than bitmask can differ in pfn and end_pfn is
+ * by carrying. This means after masking out bitmask,
+ * high bits starting with the first set bit in
+ * shared_bits are all equal in both pfn and end_pfn.
+ */
+ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+ }
+
/*
* Fallback to domain selective flush if no PSI support or
- * the size is too big. PSI requires page size to be 2 ^ x,
- * and the base address is naturally aligned to the size.
+ * the size is too big.
*/
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 23a38763c1d1..7ee37d996e15 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -757,6 +757,10 @@ bad_req:
goto bad_req;
}
+ /* Drop Stop Marker message. No need for a response. */
+ if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
+ goto prq_advance;
+
if (!svm || svm->pasid != req->pasid) {
/*
* It can't go away, because the driver is not permitted
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f2c45b85b9fc..857d4c2fd1a2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -506,6 +506,13 @@ int iommu_get_group_resv_regions(struct iommu_group *group,
list_for_each_entry(device, &group->devices, list) {
struct list_head dev_resv_regions;
+ /*
+ * Non-API groups still expose reserved_regions in sysfs,
+ * so filter out calls that get here that way.
+ */
+ if (!device->dev->iommu)
+ break;
+
INIT_LIST_HEAD(&dev_resv_regions);
iommu_get_resv_regions(device->dev, &dev_resv_regions);
ret = iommu_insert_device_resv_regions(&dev_resv_regions, head);
@@ -3019,7 +3026,7 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
return -EACCES;
- if (WARN_ON(!group))
+ if (WARN_ON(!group) || !group->default_domain)
return -EINVAL;
if (sysfs_streq(buf, "identity"))
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 097577ae3c47..ce13c272c387 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -336,7 +336,7 @@ static int bch_allocator_thread(void *arg)
mutex_unlock(&ca->set->bucket_lock);
blkdev_issue_discard(ca->bdev,
bucket_to_sector(ca->set, bucket),
- ca->sb.bucket_size, GFP_KERNEL, 0);
+ ca->sb.bucket_size, GFP_KERNEL);
mutex_lock(&ca->set->bucket_lock);
}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 6230dfdd9286..7510d1c983a5 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -107,15 +107,16 @@ void bch_btree_verify(struct btree *b)
void bch_data_verify(struct cached_dev *dc, struct bio *bio)
{
+ unsigned int nr_segs = bio_segments(bio);
struct bio *check;
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_kmalloc(GFP_NOIO, bio_segments(bio));
+ check = bio_kmalloc(nr_segs, GFP_NOIO);
if (!check)
return;
- bio_set_dev(check, bio->bi_bdev);
- check->bi_opf = REQ_OP_READ;
+ bio_init(check, bio->bi_bdev, check->bi_inline_vecs, nr_segs,
+ REQ_OP_READ);
check->bi_iter.bi_sector = bio->bi_iter.bi_sector;
check->bi_iter.bi_size = bio->bi_iter.bi_size;
@@ -146,7 +147,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
bio_free_pages(check);
out_put:
- bio_put(check);
+ bio_uninit(check);
+ kfree(check);
}
#endif
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 7c2ca52ca3e4..df5347ea450b 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -771,12 +771,12 @@ static void journal_write_unlocked(struct closure *cl)
bio_reset(bio, ca->bdev, REQ_OP_WRITE |
REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA);
- bch_bio_map(bio, w->data);
bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
bio->bi_iter.bi_size = sectors << 9;
bio->bi_end_io = journal_write_endio;
bio->bi_private = w;
+ bch_bio_map(bio, w->data);
trace_bcache_journal_write(bio, w->data->keys);
bio_list_add(&list, bio);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index fdd0194f84dd..9c5dde73da88 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s,
{
struct bio *bio = &s->bio.bio;
- bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO);
+ bio_init_clone(orig_bio->bi_bdev, bio, orig_bio, GFP_NOIO);
/*
* bi_end_io can be set separately somewhere else, e.g. the
* variants in,
@@ -1005,7 +1005,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
bio_get(s->iop.bio);
if (bio_op(bio) == REQ_OP_DISCARD &&
- !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ !bdev_max_discard_sectors(dc->bdev))
goto insert_data;
/* I/O request sent to backing device */
@@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
bio->bi_private = ddip;
if ((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ !bdev_max_discard_sectors(dc->bdev))
bio->bi_end_io(bio);
else
submit_bio_noacct(bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bf3de149d3c9..2f49e31142f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -973,7 +973,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
blk_queue_write_cache(q, true, true);
@@ -2350,7 +2349,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
ca->bdev->bd_holder = ca;
ca->sb_disk = sb_disk;
- if (blk_queue_discard(bdev_get_queue(bdev)))
+ if (bdev_max_discard_sectors((bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
ret = cache_alloc(ca);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index d1029d71ff3b..c6f677059214 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -1151,7 +1151,7 @@ STORE(__bch_cache)
if (attr == &sysfs_discard) {
bool v = strtoul_or_return(buf);
- if (blk_queue_discard(bdev_get_queue(ca->bdev)))
+ if (bdev_max_discard_sectors(ca->bdev))
ca->discard = v;
if (v != CACHE_DISCARD(&ca->sb)) {
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index e9cbc70d5a0e..5ffa1dcf84cf 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -611,7 +611,8 @@ static void bio_complete(struct bio *bio)
{
struct dm_buffer *b = bio->bi_private;
blk_status_t status = bio->bi_status;
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
b->end_io(b, status);
}
@@ -626,16 +627,14 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
vec_size += 2;
- bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
+ bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
if (!bio) {
dmio:
use_dmio(b, rw, sector, n_sectors, offset);
return;
}
-
+ bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, b->c->bdev);
- bio_set_op_attrs(bio, rw, 0);
bio->bi_end_io = bio_complete;
bio->bi_private = b;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 780a61bc6cc0..28c5de8eca4a 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3329,13 +3329,6 @@ static int cache_iterate_devices(struct dm_target *ti,
return r;
}
-static bool origin_dev_supports_discard(struct block_device *origin_bdev)
-{
- struct request_queue *q = bdev_get_queue(origin_bdev);
-
- return blk_queue_discard(q);
-}
-
/*
* If discard_passdown was enabled verify that the origin device
* supports discards. Disable discard_passdown if not.
@@ -3349,7 +3342,7 @@ static void disable_passdown_if_not_supported(struct cache *cache)
if (!cache->features.discard_passdown)
return;
- if (!origin_dev_supports_discard(origin_bdev))
+ if (!bdev_max_discard_sectors(origin_bdev))
reason = "discard unsupported";
else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 128316a73d01..811b0a5379d0 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2016,13 +2016,6 @@ static void clone_resume(struct dm_target *ti)
do_waker(&clone->waker.work);
}
-static bool bdev_supports_discards(struct block_device *bdev)
-{
- struct request_queue *q = bdev_get_queue(bdev);
-
- return (q && blk_queue_discard(q));
-}
-
/*
* If discard_passdown was enabled verify that the destination device supports
* discards. Disable discard_passdown if not.
@@ -2036,7 +2029,7 @@ static void disable_passdown_if_not_supported(struct clone *clone)
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
return;
- if (!bdev_supports_discards(dest_dev))
+ if (!bdev_max_discard_sectors(dest_dev))
reason = "discard unsupported";
else if (dest_limits->max_discard_sectors < clone->region_size)
reason = "max discard sectors smaller than a region";
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 5762366333a2..e4b95eaeec8c 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -311,7 +311,7 @@ static void do_region(int op, int op_flags, unsigned region,
* Reject unsupported discard and write same requests.
*/
if (op == REQ_OP_DISCARD)
- special_cmd_max_sectors = q->limits.max_discard_sectors;
+ special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev);
else if (op == REQ_OP_WRITE_ZEROES)
special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) &&
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index c9d036d6bb2e..e194226c89e5 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -866,9 +866,8 @@ static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv,
static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct log_writes_c *lc = ti->private;
- struct request_queue *q = bdev_get_queue(lc->dev->bdev);
- if (!q || !blk_queue_discard(q)) {
+ if (!bdev_max_discard_sectors(lc->dev->bdev)) {
lc->device_supports_discard = false;
limits->discard_granularity = lc->sectorsize;
limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 2b26435a6946..9526ccbedafb 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2963,13 +2963,8 @@ static void configure_discard_support(struct raid_set *rs)
raid456 = rs_is_raid456(rs);
for (i = 0; i < rs->raid_disks; i++) {
- struct request_queue *q;
-
- if (!rs->dev[i].rdev.bdev)
- continue;
-
- q = bdev_get_queue(rs->dev[i].rdev.bdev);
- if (!q || !blk_queue_discard(q))
+ if (!rs->dev[i].rdev.bdev ||
+ !bdev_max_discard_sectors(rs->dev[i].rdev.bdev))
return;
if (raid456) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 03541cfc2317..e7d42f6335a2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1820,9 +1820,7 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !blk_queue_nonrot(q);
+ return !bdev_nonrot(dev->bdev);
}
static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
@@ -1890,9 +1888,7 @@ static bool dm_table_supports_nowait(struct dm_table *t)
static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !blk_queue_discard(q);
+ return !bdev_max_discard_sectors(dev->bdev);
}
static bool dm_table_supports_discards(struct dm_table *t)
@@ -1924,9 +1920,7 @@ static int device_not_secure_erase_capable(struct dm_target *ti,
struct dm_dev *dev, sector_t start,
sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !blk_queue_secure_erase(q);
+ return !bdev_max_secure_erase_sectors(dev->bdev);
}
static bool dm_table_supports_secure_erase(struct dm_table *t)
@@ -1952,9 +1946,7 @@ static int device_requires_stable_pages(struct dm_target *ti,
struct dm_dev *dev, sector_t start,
sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return blk_queue_stable_writes(q);
+ return bdev_stable_writes(dev->bdev);
}
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
@@ -1974,18 +1966,15 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
if (!dm_table_supports_discards(t)) {
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
- /* Must also clear discard limits... */
q->limits.max_discard_sectors = 0;
q->limits.max_hw_discard_sectors = 0;
q->limits.discard_granularity = 0;
q->limits.discard_alignment = 0;
q->limits.discard_misaligned = 0;
- } else
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
+ }
- if (dm_table_supports_secure_erase(t))
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
+ if (!dm_table_supports_secure_erase(t))
+ q->limits.max_secure_erase_sectors = 0;
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4d25d0e27031..84c083f76673 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -398,8 +398,8 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
sector_t s = block_to_sectors(tc->pool, data_b);
sector_t len = block_to_sectors(tc->pool, data_e - data_b);
- return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
- GFP_NOWAIT, 0, &op->bio);
+ return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT,
+ &op->bio);
}
static void end_discard(struct discard_op *op, int r)
@@ -2802,13 +2802,6 @@ static void requeue_bios(struct pool *pool)
/*----------------------------------------------------------------
* Binding of control targets to a pool object
*--------------------------------------------------------------*/
-static bool data_dev_supports_discard(struct pool_c *pt)
-{
- struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-
- return blk_queue_discard(q);
-}
-
static bool is_factor(sector_t block_size, uint32_t n)
{
return !sector_div(block_size, n);
@@ -2828,7 +2821,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
if (!pt->adjusted_pf.discard_passdown)
return;
- if (!data_dev_supports_discard(pt))
+ if (!bdev_max_discard_sectors(pt->data_dev->bdev))
reason = "discard unsupported";
else if (data_limits->max_discard_sectors < pool->sectors_per_block)
@@ -4057,8 +4050,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
/*
* Must explicitly disallow stacking discard limits otherwise the
* block layer will stack them if pool's data device has support.
- * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
- * user to see that, so make sure to set all discard limits to 0.
*/
limits->discard_granularity = 0;
return;
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index cac295cc8840..0ec5d8b9b1a4 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1001,7 +1001,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
- limits->discard_alignment = DMZ_BLOCK_SIZE;
+ limits->discard_alignment = 0;
limits->discard_granularity = DMZ_BLOCK_SIZE;
limits->max_discard_sectors = chunk_sectors;
limits->max_hw_discard_sectors = chunk_sectors;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 82957bd460e8..39081338ca61 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -955,7 +955,6 @@ void disable_discard(struct mapped_device *md)
/* device doesn't really support DISCARD, disable it */
limits->max_discard_sectors = 0;
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
}
void disable_write_zeroes(struct mapped_device *md)
@@ -982,7 +981,7 @@ static void clone_endio(struct bio *bio)
if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_DISCARD &&
- !q->limits.max_discard_sectors)
+ !bdev_max_discard_sectors(bio->bi_bdev))
disable_discard(md);
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
!q->limits.max_write_zeroes_sectors)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bfd6026d7809..d87f674ab762 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -639,14 +639,6 @@ re_read:
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
- /* Setup nodes/clustername only if bitmap version is
- * cluster-compatible
- */
- if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
- nodes = le32_to_cpu(sb->nodes);
- strlcpy(bitmap->mddev->bitmap_info.cluster_name,
- sb->cluster_name, 64);
- }
/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -668,6 +660,16 @@ re_read:
goto out;
}
+ /*
+ * Setup nodes/clustername only if bitmap version is
+ * cluster-compatible
+ */
+ if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
+ nodes = le32_to_cpu(sb->nodes);
+ strscpy(bitmap->mddev->bitmap_info.cluster_name,
+ sb->cluster_name, 64);
+ }
+
/* keep the array size field of the bitmap superblock up to date */
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
@@ -695,14 +697,13 @@ re_read:
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
- strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
err = 0;
out:
kunmap_atomic(sb);
- /* Assigning chunksize is required for "re_read" */
- bitmap->mddev->bitmap_info.chunksize = chunksize;
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
+ /* Assigning chunksize is required for "re_read" */
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
err = md_setup_cluster(bitmap->mddev, nodes);
if (err) {
pr_warn("%s: Could not setup cluster service (%d)\n",
@@ -713,18 +714,18 @@ out:
goto re_read;
}
-
out_no_sb:
- if (test_bit(BITMAP_STALE, &bitmap->flags))
- bitmap->events_cleared = bitmap->mddev->events;
- bitmap->mddev->bitmap_info.chunksize = chunksize;
- bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
- bitmap->mddev->bitmap_info.max_write_behind = write_behind;
- bitmap->mddev->bitmap_info.nodes = nodes;
- if (bitmap->mddev->bitmap_info.space == 0 ||
- bitmap->mddev->bitmap_info.space > sectors_reserved)
- bitmap->mddev->bitmap_info.space = sectors_reserved;
- if (err) {
+ if (err == 0) {
+ if (test_bit(BITMAP_STALE, &bitmap->flags))
+ bitmap->events_cleared = bitmap->mddev->events;
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
+ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+ bitmap->mddev->bitmap_info.nodes = nodes;
+ if (bitmap->mddev->bitmap_info.space == 0 ||
+ bitmap->mddev->bitmap_info.space > sectors_reserved)
+ bitmap->mddev->bitmap_info.space = sectors_reserved;
+ } else {
md_bitmap_print_sb(bitmap);
if (bitmap->cluster_slot < 0)
md_cluster_stop(bitmap->mddev);
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 1c8a06b77c85..37cbcce3cc66 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
goto out_err;
}
- strlcpy(res->name, name, namelen + 1);
+ strscpy(res->name, name, namelen + 1);
if (with_lvb) {
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
if (!res->lksb.sb_lvbptr) {
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 0f55b079371b..138a3b25c5c8 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -64,7 +64,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
struct linear_conf *conf;
struct md_rdev *rdev;
int i, cnt;
- bool discard_supported = false;
conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
if (!conf)
@@ -96,9 +95,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
conf->array_sectors += rdev->sectors;
cnt++;
-
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
- discard_supported = true;
}
if (cnt != raid_disks) {
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
@@ -106,11 +102,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
goto out;
}
- if (!discard_supported)
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
- else
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
-
/*
* Here we calculate the device offsets.
*/
@@ -252,7 +243,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
start_sector + data_offset;
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) {
+ !bdev_max_discard_sectors(bio->bi_bdev))) {
/* Just ignore it */
bio_endio(bio);
} else {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 309b3af906ad..707e802d0082 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
static bool does_sb_need_changing(struct mddev *mddev)
{
- struct md_rdev *rdev;
+ struct md_rdev *rdev = NULL, *iter;
struct mdp_superblock_1 *sb;
int role;
/* Find a good rdev */
- rdev_for_each(rdev, mddev)
- if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
+ rdev_for_each(iter, mddev)
+ if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
+ rdev = iter;
break;
+ }
/* No good device found. */
if (!rdev)
@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
rdev_for_each(rdev, mddev) {
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
/* Device activated? */
- if (role == 0xffff && rdev->raid_disk >=0 &&
+ if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags))
return true;
/* Device turned faulty? */
- if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
+ if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
return true;
}
@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
- if (test_bit(Faulty, &rdev->flags))
- err = 0;
- else
+
+ if (test_bit(MD_BROKEN, &rdev->mddev->flags))
err = -EBUSY;
+ else
+ err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
oldpriv = mddev->private;
mddev->pers = pers;
mddev->private = priv;
- strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* like active, but no writes have been seen for a while (100msec).
*
* broken
- * RAID0/LINEAR-only: same as clean, but array is missing a member.
- * It's useful because RAID0/LINEAR mounted-arrays aren't stopped
- * when a member is gone, so this state will at least alert the
- * user that something is wrong.
+* Array is failed. It's useful because mounted-arrays aren't stopped
+* when array is failed, so this state will at least alert the user that
+* something is wrong.
*/
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, broken, bad_word};
@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
len--;
if (len >= DISK_NAME_LEN)
return -E2BIG;
- strlcpy(buf, val, len+1);
+ strscpy(buf, val, len+1);
if (strncmp(buf, "md_", 3) == 0)
return md_alloc(0, buf);
if (strncmp(buf, "md", 2) == 0 &&
@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
mddev->level = pers->level;
mddev->new_level = pers->level;
}
- strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
@@ -5991,8 +5993,7 @@ int md_run(struct mddev *mddev)
bool nonrot = true;
rdev_for_each(rdev, mddev) {
- if (rdev->raid_disk >= 0 &&
- !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+ if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
nonrot = false;
break;
}
@@ -7444,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV;
else {
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags))
+ if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY;
}
rcu_read_unlock();
@@ -7985,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
if (!mddev->pers || !mddev->pers->error_handler)
return;
- mddev->pers->error_handler(mddev,rdev);
- if (mddev->degraded)
+ mddev->pers->error_handler(mddev, rdev);
+
+ if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ if (!test_bit(MD_BROKEN, &mddev->flags)) {
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
@@ -8585,7 +8589,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
{
struct bio *discard_bio = NULL;
- if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
+ if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
&discard_bio) || !discard_bio)
return;
@@ -9671,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
if (test_bit(Candidate, &rdev2->flags)) {
- if (role == 0xfffe) {
+ if (role == MD_DISK_ROLE_FAULTY) {
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
md_kick_rdev_from_array(rdev2);
continue;
@@ -9684,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
/*
* got activated except reshape is happening.
*/
- if (rdev2->raid_disk == -1 && role != 0xffff &&
+ if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) {
rdev2->saved_raid_disk = role;
@@ -9701,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
* as faulty. The recovery is performed by the
* one who initiated the error.
*/
- if ((role == 0xfffe) || (role == 0xfffd)) {
+ if (role == MD_DISK_ROLE_FAULTY ||
+ role == MD_DISK_ROLE_JOURNAL) {
md_error(mddev, rdev2);
clear_bit(Blocked, &rdev2->flags);
}
@@ -9791,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
void md_reload_sb(struct mddev *mddev, int nr)
{
- struct md_rdev *rdev;
+ struct md_rdev *rdev = NULL, *iter;
int err;
/* Find the rdev */
- rdev_for_each_rcu(rdev, mddev) {
- if (rdev->desc_nr == nr)
+ rdev_for_each_rcu(iter, mddev) {
+ if (iter->desc_nr == nr) {
+ rdev = iter;
break;
+ }
}
- if (!rdev || rdev->desc_nr != nr) {
+ if (!rdev) {
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
return;
}
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 6ac283864533..cf2cbb17acbd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;
-/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
+/**
+ * enum mddev_flags - md device flags.
+ * @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
+ * @MD_CLOSING: If set, we are closing the array, do not open it then.
+ * @MD_JOURNAL_CLEAN: A raid with journal is already clean.
+ * @MD_HAS_JOURNAL: The raid array has journal feature set.
+ * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
+ * resync lock, need to release the lock.
+ * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
+ * calls to md_error() will never cause the array to
+ * become failed.
+ * @MD_HAS_PPL: The raid array has PPL feature set.
+ * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
+ * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
+ * without taking reconfig_mutex.
+ * @MD_UPDATING_SB: md_check_recovery is updating the metadata without
+ * explicitly holding reconfig_mutex.
+ * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
+ * array is ready yet.
+ * @MD_BROKEN: This is used to stop writes and mark array as failed.
+ *
+ * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
+ */
enum mddev_flags {
- MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
- MD_CLOSING, /* If set, we are closing the array, do not open
- * it then */
- MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
- MD_HAS_JOURNAL, /* The raid array has journal feature set */
- MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
- * already took resync lock, need to
- * release the lock */
- MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
- * supported as calls to md_error() will
- * never cause the array to become failed.
- */
- MD_HAS_PPL, /* The raid array has PPL feature set */
- MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
- MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
- * the metadata without taking reconfig_mutex.
- */
- MD_UPDATING_SB, /* md_check_recovery is updating the metadata
- * without explicitly holding reconfig_mutex.
- */
- MD_NOT_READY, /* do_md_run() is active, so 'array_state'
- * must not report that array is ready yet
- */
- MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
- * I/O in case an array member is gone/failed.
- */
+ MD_ARRAY_FIRST_USE,
+ MD_CLOSING,
+ MD_JOURNAL_CLEAN,
+ MD_HAS_JOURNAL,
+ MD_CLUSTER_RESYNC_LOCKED,
+ MD_FAILFAST_SUPPORTED,
+ MD_HAS_PPL,
+ MD_HAS_MULTIPLE_PPLS,
+ MD_ALLOW_SB_UPDATE,
+ MD_UPDATING_SB,
+ MD_NOT_READY,
+ MD_BROKEN,
};
enum mddev_sb_flags {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index b21e101183f4..e11701e394ca 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);
- if (conf->nr_strip_zones == 1) {
- conf->layout = RAID0_ORIG_LAYOUT;
- } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
- mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
- conf->layout = mddev->layout;
- } else if (default_layout == RAID0_ORIG_LAYOUT ||
- default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
- conf->layout = default_layout;
- } else {
- pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
- mdname(mddev));
- pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
- err = -ENOTSUPP;
- goto abort;
- }
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
(unsigned long long)smallest->sectors);
}
+ if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
+ conf->layout = RAID0_ORIG_LAYOUT;
+ } else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+ mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = mddev->layout;
+ } else if (default_layout == RAID0_ORIG_LAYOUT ||
+ default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+ conf->layout = default_layout;
+ } else {
+ pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
+ mdname(mddev));
+ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
+ err = -EOPNOTSUPP;
+ goto abort;
+ }
+
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
*private_conf = conf;
@@ -399,7 +400,6 @@ static int raid0_run(struct mddev *mddev)
conf = mddev->private;
if (mddev->queue) {
struct md_rdev *rdev;
- bool discard_supported = false;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
@@ -412,13 +412,7 @@ static int raid0_run(struct mddev *mddev)
rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
- discard_supported = true;
}
- if (!discard_supported)
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
- else
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
}
/* calculate array device size */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 99d5464a51f8..99d5af1362d7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -165,9 +165,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
* Allocate bios : 1 for reading, n-1 for writing
*/
for (j = pi->raid_disks ; j-- ; ) {
- bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+ bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
if (!bio)
goto out_free_bio;
+ bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
r1_bio->bios[j] = bio;
}
/*
@@ -206,8 +207,10 @@ out_free_pages:
resync_free_pages(&rps[j]);
out_free_bio:
- while (++j < pi->raid_disks)
- bio_put(r1_bio->bios[j]);
+ while (++j < pi->raid_disks) {
+ bio_uninit(r1_bio->bios[j]);
+ kfree(r1_bio->bios[j]);
+ }
kfree(rps);
out_free_r1bio:
@@ -225,7 +228,8 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
for (i = pi->raid_disks; i--; ) {
rp = get_resync_pages(r1bio->bios[i]);
resync_free_pages(rp);
- bio_put(r1bio->bios[i]);
+ bio_uninit(r1bio->bios[i]);
+ kfree(r1bio->bios[i]);
}
/* resync pages array stored in the 1st bio's .bi_private */
@@ -704,7 +708,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
/* At least two disks to choose from so failfast is OK */
set_bit(R1BIO_FailFast, &r1_bio->state);
- nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+ nonrot = bdev_nonrot(rdev->bdev);
has_nonrot_disk |= nonrot;
pending = atomic_read(&rdev->nr_pending);
dist = abs(this_sector - conf->mirrors[disk].head_position);
@@ -802,7 +806,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+ !bdev_max_discard_sectors(bio->bi_bdev)))
/* Just ignore it */
bio_endio(bio);
else
@@ -1637,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, "]");
}
+/**
+ * raid1_error() - RAID1 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to fail.
+ *
+ * The routine acknowledges &rdev failure and determines new @mddev state.
+ * If it failed, then:
+ * - &MD_BROKEN flag is set in &mddev->flags.
+ * - recovery is disabled.
+ * Otherwise, it must be degraded:
+ * - recovery is interrupted.
+ * - &mddev->degraded is bumped.
+ *
+ * @rdev is marked as &Faulty excluding case when array is failed and
+ * &mddev->fail_last_dev is off.
+ */
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r1conf *conf = mddev->private;
unsigned long flags;
- /*
- * If it is not operational, then we have already marked it as dead
- * else if it is the last working disks with "fail_last_dev == false",
- * ignore the error, let the next level up know.
- * else mark the drive as failed
- */
spin_lock_irqsave(&conf->device_lock, flags);
- if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
- && (conf->raid_disks - mddev->degraded) == 1) {
- /*
- * Don't fail the drive, act as though we were just a
- * normal single drive.
- * However don't try a recovery from this drive as
- * it is very likely to fail.
- */
- conf->recovery_disabled = mddev->recovery_disabled;
- spin_unlock_irqrestore(&conf->device_lock, flags);
- return;
+
+ if (test_bit(In_sync, &rdev->flags) &&
+ (conf->raid_disks - mddev->degraded) == 1) {
+ set_bit(MD_BROKEN, &mddev->flags);
+
+ if (!mddev->fail_last_dev) {
+ conf->recovery_disabled = mddev->recovery_disabled;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ return;
+ }
}
set_bit(Blocked, &rdev->flags);
if (test_and_clear_bit(In_sync, &rdev->flags))
@@ -1826,8 +1839,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
}
- if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
}
@@ -3106,7 +3117,6 @@ static int raid1_run(struct mddev *mddev)
int i;
struct md_rdev *rdev;
int ret;
- bool discard_supported = false;
if (mddev->level != 1) {
pr_warn("md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -3141,8 +3151,6 @@ static int raid1_run(struct mddev *mddev)
continue;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
- discard_supported = true;
}
mddev->degraded = 0;
@@ -3179,15 +3187,6 @@ static int raid1_run(struct mddev *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
- if (mddev->queue) {
- if (discard_supported)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD,
- mddev->queue);
- else
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
- mddev->queue);
- }
-
ret = md_integrity_register(mddev);
if (ret) {
md_unregister_thread(&mddev->thread);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index dfe7d62d3fbd..dfa576cdf11c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -145,15 +145,17 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
* Allocate bios.
*/
for (j = nalloc ; j-- ; ) {
- bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+ bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
if (!bio)
goto out_free_bio;
+ bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
r10_bio->devs[j].bio = bio;
if (!conf->have_replacement)
continue;
- bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
+ bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
if (!bio)
goto out_free_bio;
+ bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
r10_bio->devs[j].repl_bio = bio;
}
/*
@@ -197,9 +199,11 @@ out_free_pages:
out_free_bio:
for ( ; j < nalloc; j++) {
if (r10_bio->devs[j].bio)
- bio_put(r10_bio->devs[j].bio);
+ bio_uninit(r10_bio->devs[j].bio);
+ kfree(r10_bio->devs[j].bio);
if (r10_bio->devs[j].repl_bio)
- bio_put(r10_bio->devs[j].repl_bio);
+ bio_uninit(r10_bio->devs[j].repl_bio);
+ kfree(r10_bio->devs[j].repl_bio);
}
kfree(rps);
out_free_r10bio:
@@ -220,12 +224,15 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
if (bio) {
rp = get_resync_pages(bio);
resync_free_pages(rp);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
}
bio = r10bio->devs[j].repl_bio;
- if (bio)
- bio_put(bio);
+ if (bio) {
+ bio_uninit(bio);
+ kfree(bio);
+ }
}
/* resync pages array stored in the 1st bio's .bi_private */
@@ -796,7 +803,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (!do_balance)
break;
- nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+ nonrot = bdev_nonrot(rdev->bdev);
has_nonrot_disk |= nonrot;
pending = atomic_read(&rdev->nr_pending);
if (min_pending > pending && nonrot) {
@@ -888,7 +895,7 @@ static void flush_pending_writes(struct r10conf *conf)
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+ !bdev_max_discard_sectors(bio->bi_bdev)))
/* Just ignore it */
bio_endio(bio);
else
@@ -1083,7 +1090,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bio->bi_bdev->bd_disk->queue)))
+ !bdev_max_discard_sectors(bio->bi_bdev)))
/* Just ignore it */
bio_endio(bio);
else
@@ -1963,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
_enough(conf, 1, ignore);
}
+/**
+ * raid10_error() - RAID10 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to fail.
+ *
+ * The routine acknowledges &rdev failure and determines new @mddev state.
+ * If it failed, then:
+ * - &MD_BROKEN flag is set in &mddev->flags.
+ * Otherwise, it must be degraded:
+ * - recovery is interrupted.
+ * - &mddev->degraded is bumped.
+
+ * @rdev is marked as &Faulty excluding case when array is failed and
+ * &mddev->fail_last_dev is off.
+ */
static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r10conf *conf = mddev->private;
unsigned long flags;
- /*
- * If it is not operational, then we have already marked it as dead
- * else if it is the last working disks with "fail_last_dev == false",
- * ignore the error, let the next level up know.
- * else mark the drive as failed
- */
spin_lock_irqsave(&conf->device_lock, flags);
- if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
- && !enough(conf, rdev->raid_disk)) {
- /*
- * Don't fail the drive, just return an IO error.
- */
- spin_unlock_irqrestore(&conf->device_lock, flags);
- return;
+
+ if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
+ set_bit(MD_BROKEN, &mddev->flags);
+
+ if (!mddev->fail_last_dev) {
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ return;
+ }
}
if (test_and_clear_bit(In_sync, &rdev->flags))
mddev->degraded++;
- /*
- * If recovery is running, make sure it aborts.
- */
+
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
@@ -2144,8 +2159,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rcu_assign_pointer(p->rdev, rdev);
break;
}
- if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
@@ -4069,7 +4082,6 @@ static int raid10_run(struct mddev *mddev)
sector_t size;
sector_t min_offset_diff = 0;
int first = 1;
- bool discard_supported = false;
if (mddev_init_writes_pending(mddev) < 0)
return -ENOMEM;
@@ -4140,20 +4152,9 @@ static int raid10_run(struct mddev *mddev)
rdev->data_offset << 9);
disk->head_position = 0;
-
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
- discard_supported = true;
first = 0;
}
- if (mddev->queue) {
- if (discard_supported)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD,
- mddev->queue);
- else
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
- mddev->queue);
- }
/* need to check that every block has at least one working mirror */
if (!enough(conf, -1)) {
pr_err("md/raid10:%s: not enough operational mirrors.\n",
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index a7d50ff9020a..094a4042589e 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1318,7 +1318,7 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
r5l_write_super(log, end);
- if (!blk_queue_discard(bdev_get_queue(bdev)))
+ if (!bdev_max_discard_sectors(bdev))
return;
mddev = log->rdev->mddev;
@@ -1344,14 +1344,14 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
if (log->last_checkpoint < end) {
blkdev_issue_discard(bdev,
log->last_checkpoint + log->rdev->data_offset,
- end - log->last_checkpoint, GFP_NOIO, 0);
+ end - log->last_checkpoint, GFP_NOIO);
} else {
blkdev_issue_discard(bdev,
log->last_checkpoint + log->rdev->data_offset,
log->device_size - log->last_checkpoint,
- GFP_NOIO, 0);
+ GFP_NOIO);
blkdev_issue_discard(bdev, log->rdev->data_offset, end,
- GFP_NOIO, 0);
+ GFP_NOIO);
}
}
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index d3962d92df18..55d065a87b89 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)r_sector, dd_idx,
(unsigned long long)sector);
- rdev = conf->disks[dd_idx].rdev;
+ /* Array has not started so rcu dereference is safe */
+ rdev = rcu_dereference_protected(
+ conf->disks[dd_idx].rdev, 1);
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
sector >= rdev->recovery_offset)) {
pr_debug("%s:%*s data member disk %d missing\n",
@@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
parity_sector = raid5_compute_sector(conf, r_sector_first + i,
0, &disk, &sh);
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
- parity_rdev = conf->disks[sh.pd_idx].rdev;
+
+ /* Array has not started so rcu dereference is safe */
+ parity_rdev = rcu_dereference_protected(
+ conf->disks[sh.pd_idx].rdev, 1);
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
@@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
for (i = 0; i < ppl_conf->count; i++) {
struct ppl_log *log = &ppl_conf->child_logs[i];
- struct md_rdev *rdev = conf->disks[i].rdev;
+ /* Array has not started so rcu dereference is safe */
+ struct md_rdev *rdev =
+ rcu_dereference_protected(conf->disks[i].rdev, 1);
mutex_init(&log->io_mutex);
spin_lock_init(&log->io_list_lock);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 351d341a1ffa..39038fa8b1c8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -79,18 +79,21 @@ static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
}
static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+ __acquires(&conf->device_lock)
{
spin_lock_irq(conf->hash_locks + hash);
spin_lock(&conf->device_lock);
}
static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+ __releases(&conf->device_lock)
{
spin_unlock(&conf->device_lock);
spin_unlock_irq(conf->hash_locks + hash);
}
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+ __acquires(&conf->device_lock)
{
int i;
spin_lock_irq(conf->hash_locks);
@@ -100,6 +103,7 @@ static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
}
static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+ __releases(&conf->device_lock)
{
int i;
spin_unlock(&conf->device_lock);
@@ -164,6 +168,7 @@ static bool stripe_is_lowprio(struct stripe_head *sh)
}
static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
+ __must_hold(&sh->raid_conf->device_lock)
{
struct r5conf *conf = sh->raid_conf;
struct r5worker_group *group;
@@ -211,6 +216,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
struct list_head *temp_inactive_list)
+ __must_hold(&conf->device_lock)
{
int i;
int injournal = 0; /* number of date pages with R5_InJournal */
@@ -296,6 +302,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
struct list_head *temp_inactive_list)
+ __must_hold(&conf->device_lock)
{
if (atomic_dec_and_test(&sh->count))
do_release_stripe(conf, sh, temp_inactive_list);
@@ -350,9 +357,9 @@ static void release_inactive_stripe_list(struct r5conf *conf,
}
}
-/* should hold conf->device_lock already */
static int release_stripe_list(struct r5conf *conf,
struct list_head *temp_inactive_list)
+ __must_hold(&conf->device_lock)
{
struct stripe_head *sh, *t;
int count = 0;
@@ -629,6 +636,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
* This is because some failed devices may only affect one
* of the two sections, and some non-in_sync devices may
* be insync in the section most affected by failed devices.
+ *
+ * Most calls to this function hold &conf->device_lock. Calls
+ * in raid5_run() do not require the lock as no other threads
+ * have been started yet.
*/
int raid5_calc_degraded(struct r5conf *conf)
{
@@ -686,17 +697,17 @@ int raid5_calc_degraded(struct r5conf *conf)
return degraded;
}
-static int has_failed(struct r5conf *conf)
+static bool has_failed(struct r5conf *conf)
{
- int degraded;
+ int degraded = conf->mddev->degraded;
- if (conf->mddev->reshape_position == MaxSector)
- return conf->mddev->degraded > conf->max_degraded;
+ if (test_bit(MD_BROKEN, &conf->mddev->flags))
+ return true;
- degraded = raid5_calc_degraded(conf);
- if (degraded > conf->max_degraded)
- return 1;
- return 0;
+ if (conf->mddev->reshape_position != MaxSector)
+ degraded = raid5_calc_degraded(conf);
+
+ return degraded > conf->max_degraded;
}
struct stripe_head *
@@ -2648,6 +2659,28 @@ static void shrink_stripes(struct r5conf *conf)
conf->slab_cache = NULL;
}
+/*
+ * This helper wraps rcu_dereference_protected() and can be used when
+ * it is known that the nr_pending of the rdev is elevated.
+ */
+static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
+{
+ return rcu_dereference_protected(rdev,
+ atomic_read(&rcu_access_pointer(rdev)->nr_pending));
+}
+
+/*
+ * This helper wraps rcu_dereference_protected() and should be used
+ * when it is known that the mddev_lock() is held. This is safe
+ * seeing raid5_remove_disk() has the same lock held.
+ */
+static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
+ struct md_rdev __rcu *rdev)
+{
+ return rcu_dereference_protected(rdev,
+ lockdep_is_held(&mddev->reconfig_mutex));
+}
+
static void raid5_end_read_request(struct bio * bi)
{
struct stripe_head *sh = bi->bi_private;
@@ -2674,9 +2707,9 @@ static void raid5_end_read_request(struct bio * bi)
* In that case it moved down to 'rdev'.
* rdev is not removed until all requests are finished.
*/
- rdev = conf->disks[i].replacement;
+ rdev = rdev_pend_deref(conf->disks[i].replacement);
if (!rdev)
- rdev = conf->disks[i].rdev;
+ rdev = rdev_pend_deref(conf->disks[i].rdev);
if (use_new_offset(conf, sh))
s = sh->sector + rdev->new_data_offset;
@@ -2790,11 +2823,11 @@ static void raid5_end_write_request(struct bio *bi)
for (i = 0 ; i < disks; i++) {
if (bi == &sh->dev[i].req) {
- rdev = conf->disks[i].rdev;
+ rdev = rdev_pend_deref(conf->disks[i].rdev);
break;
}
if (bi == &sh->dev[i].rreq) {
- rdev = conf->disks[i].replacement;
+ rdev = rdev_pend_deref(conf->disks[i].replacement);
if (rdev)
replacement = 1;
else
@@ -2802,7 +2835,7 @@ static void raid5_end_write_request(struct bio *bi)
* replaced it. rdev is not removed
* until all requests are finished.
*/
- rdev = conf->disks[i].rdev;
+ rdev = rdev_pend_deref(conf->disks[i].rdev);
break;
}
}
@@ -2863,34 +2896,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
unsigned long flags;
pr_debug("raid456: error called\n");
+ pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
+ mdname(mddev), bdevname(rdev->bdev, b));
+
spin_lock_irqsave(&conf->device_lock, flags);
+ set_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ mddev->degraded = raid5_calc_degraded(conf);
- if (test_bit(In_sync, &rdev->flags) &&
- mddev->degraded == conf->max_degraded) {
- /*
- * Don't allow to achieve failed state
- * Don't try to recover this device
- */
+ if (has_failed(conf)) {
+ set_bit(MD_BROKEN, &conf->mddev->flags);
conf->recovery_disabled = mddev->recovery_disabled;
- spin_unlock_irqrestore(&conf->device_lock, flags);
- return;
+
+ pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
+ mdname(mddev), mddev->degraded, conf->raid_disks);
+ } else {
+ pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
+ mdname(mddev), conf->raid_disks - mddev->degraded);
}
- set_bit(Faulty, &rdev->flags);
- clear_bit(In_sync, &rdev->flags);
- mddev->degraded = raid5_calc_degraded(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
- pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
- "md/raid:%s: Operation continuing on %d devices.\n",
- mdname(mddev),
- bdevname(rdev->bdev, b),
- mdname(mddev),
- conf->raid_disks - mddev->degraded);
r5c_update_on_rdev_error(mddev, rdev);
}
@@ -5213,23 +5243,23 @@ finish:
struct r5dev *dev = &sh->dev[i];
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
/* We own a safe reference to the rdev */
- rdev = conf->disks[i].rdev;
+ rdev = rdev_pend_deref(conf->disks[i].rdev);
if (!rdev_set_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0))
md_error(conf->mddev, rdev);
rdev_dec_pending(rdev, conf->mddev);
}
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
- rdev = conf->disks[i].rdev;
+ rdev = rdev_pend_deref(conf->disks[i].rdev);
rdev_clear_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev);
}
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
- rdev = conf->disks[i].replacement;
+ rdev = rdev_pend_deref(conf->disks[i].replacement);
if (!rdev)
/* rdev have been moved down */
- rdev = conf->disks[i].rdev;
+ rdev = rdev_pend_deref(conf->disks[i].rdev);
rdev_clear_badblocks(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev);
@@ -5256,6 +5286,7 @@ finish:
}
static void raid5_activate_delayed(struct r5conf *conf)
+ __must_hold(&conf->device_lock)
{
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
while (!list_empty(&conf->delayed_list)) {
@@ -5273,9 +5304,9 @@ static void raid5_activate_delayed(struct r5conf *conf)
}
static void activate_bit_delay(struct r5conf *conf,
- struct list_head *temp_inactive_list)
+ struct list_head *temp_inactive_list)
+ __must_hold(&conf->device_lock)
{
- /* device_lock is held */
struct list_head head;
list_add(&head, &conf->bitmap_list);
list_del_init(&conf->bitmap_list);
@@ -5500,6 +5531,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
* handle_list.
*/
static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
+ __must_hold(&conf->device_lock)
{
struct stripe_head *sh, *tmp;
struct list_head *handle_list = NULL;
@@ -6288,7 +6320,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
*/
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
- struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
+ struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
still_degraded = 1;
@@ -6371,8 +6403,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
static int handle_active_stripes(struct r5conf *conf, int group,
struct r5worker *worker,
struct list_head *temp_inactive_list)
- __releases(&conf->device_lock)
- __acquires(&conf->device_lock)
+ __must_hold(&conf->device_lock)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
int i, batch_size = 0, hash;
@@ -7166,7 +7197,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
int i;
int group_cnt;
struct r5worker_group *new_group;
- int ret;
+ int ret = -ENOMEM;
if (mddev->new_level != 5
&& mddev->new_level != 4
@@ -7225,6 +7256,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
spin_lock_init(&conf->device_lock);
seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
mutex_init(&conf->cache_size_mutex);
+
init_waitqueue_head(&conf->wait_for_quiescent);
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
@@ -7242,7 +7274,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
rdev_for_each(rdev, mddev) {
if (test_bit(Journal, &rdev->flags))
continue;
- if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
+ if (bdev_nonrot(rdev->bdev)) {
conf->batch_bio_dispatch = false;
break;
}
@@ -7302,11 +7334,13 @@ static struct r5conf *setup_conf(struct mddev *mddev)
conf->level = mddev->new_level;
conf->chunk_sectors = mddev->new_chunk_sectors;
- if (raid5_alloc_percpu(conf) != 0)
+ ret = raid5_alloc_percpu(conf);
+ if (ret)
goto abort;
pr_debug("raid456: run(%s) called.\n", mdname(mddev));
+ ret = -EIO;
rdev_for_each(rdev, mddev) {
raid_disk = rdev->raid_disk;
if (raid_disk >= max_disks
@@ -7317,11 +7351,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (test_bit(Replacement, &rdev->flags)) {
if (disk->replacement)
goto abort;
- disk->replacement = rdev;
+ RCU_INIT_POINTER(disk->replacement, rdev);
} else {
if (disk->rdev)
goto abort;
- disk->rdev = rdev;
+ RCU_INIT_POINTER(disk->rdev, rdev);
}
if (test_bit(In_sync, &rdev->flags)) {
@@ -7370,6 +7404,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (grow_stripes(conf, conf->min_nr_stripes)) {
pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
mdname(mddev), memory);
+ ret = -ENOMEM;
goto abort;
} else
pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
@@ -7383,7 +7418,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
conf->shrinker.count_objects = raid5_cache_count;
conf->shrinker.batch = 128;
conf->shrinker.flags = 0;
- if (register_shrinker(&conf->shrinker)) {
+ ret = register_shrinker(&conf->shrinker);
+ if (ret) {
pr_warn("md/raid:%s: couldn't register shrinker.\n",
mdname(mddev));
goto abort;
@@ -7394,17 +7430,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (!conf->thread) {
pr_warn("md/raid:%s: couldn't allocate thread.\n",
mdname(mddev));
+ ret = -ENOMEM;
goto abort;
}
return conf;
abort:
- if (conf) {
+ if (conf)
free_conf(conf);
- return ERR_PTR(-EIO);
- } else
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
}
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
@@ -7621,17 +7656,18 @@ static int raid5_run(struct mddev *mddev)
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
i++) {
- rdev = conf->disks[i].rdev;
+ rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
if (!rdev && conf->disks[i].replacement) {
/* The replacement is all we have yet */
- rdev = conf->disks[i].replacement;
+ rdev = rdev_mdlock_deref(mddev,
+ conf->disks[i].replacement);
conf->disks[i].replacement = NULL;
clear_bit(Replacement, &rdev->flags);
- conf->disks[i].rdev = rdev;
+ rcu_assign_pointer(conf->disks[i].rdev, rdev);
}
if (!rdev)
continue;
- if (conf->disks[i].replacement &&
+ if (rcu_access_pointer(conf->disks[i].replacement) &&
conf->reshape_progress != MaxSector) {
/* replacements and reshape simply do not mix. */
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
@@ -7749,7 +7785,6 @@ static int raid5_run(struct mddev *mddev)
*/
stripe = stripe * PAGE_SIZE;
stripe = roundup_pow_of_two(stripe);
- mddev->queue->limits.discard_alignment = stripe;
mddev->queue->limits.discard_granularity = stripe;
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
@@ -7776,14 +7811,10 @@ static int raid5_run(struct mddev *mddev)
* A better idea might be to turn DISCARD into WRITE_ZEROES
* requests, as that is required to be safe.
*/
- if (devices_handle_discard_safely &&
- mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
- mddev->queue->limits.discard_granularity >= stripe)
- blk_queue_flag_set(QUEUE_FLAG_DISCARD,
- mddev->queue);
- else
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
- mddev->queue);
+ if (!devices_handle_discard_safely ||
+ mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
+ mddev->queue->limits.discard_granularity < stripe)
+ blk_queue_max_discard_sectors(mddev->queue, 0);
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
}
@@ -7832,8 +7863,8 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
static void print_raid5_conf (struct r5conf *conf)
{
+ struct md_rdev *rdev;
int i;
- struct disk_info *tmp;
pr_debug("RAID conf printout:\n");
if (!conf) {
@@ -7844,50 +7875,54 @@ static void print_raid5_conf (struct r5conf *conf)
conf->raid_disks,
conf->raid_disks - conf->mddev->degraded);
+ rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
char b[BDEVNAME_SIZE];
- tmp = conf->disks + i;
- if (tmp->rdev)
+ rdev = rcu_dereference(conf->disks[i].rdev);
+ if (rdev)
pr_debug(" disk %d, o:%d, dev:%s\n",
- i, !test_bit(Faulty, &tmp->rdev->flags),
- bdevname(tmp->rdev->bdev, b));
+ i, !test_bit(Faulty, &rdev->flags),
+ bdevname(rdev->bdev, b));
}
+ rcu_read_unlock();
}
static int raid5_spare_active(struct mddev *mddev)
{
int i;
struct r5conf *conf = mddev->private;
- struct disk_info *tmp;
+ struct md_rdev *rdev, *replacement;
int count = 0;
unsigned long flags;
for (i = 0; i < conf->raid_disks; i++) {
- tmp = conf->disks + i;
- if (tmp->replacement
- && tmp->replacement->recovery_offset == MaxSector
- && !test_bit(Faulty, &tmp->replacement->flags)
- && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
+ rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
+ replacement = rdev_mdlock_deref(mddev,
+ conf->disks[i].replacement);
+ if (replacement
+ && replacement->recovery_offset == MaxSector
+ && !test_bit(Faulty, &replacement->flags)
+ && !test_and_set_bit(In_sync, &replacement->flags)) {
/* Replacement has just become active. */
- if (!tmp->rdev
- || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
+ if (!rdev
+ || !test_and_clear_bit(In_sync, &rdev->flags))
count++;
- if (tmp->rdev) {
+ if (rdev) {
/* Replaced device not technically faulty,
* but we need to be sure it gets removed
* and never re-added.
*/
- set_bit(Faulty, &tmp->rdev->flags);
+ set_bit(Faulty, &rdev->flags);
sysfs_notify_dirent_safe(
- tmp->rdev->sysfs_state);
+ rdev->sysfs_state);
}
- sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
- } else if (tmp->rdev
- && tmp->rdev->recovery_offset == MaxSector
- && !test_bit(Faulty, &tmp->rdev->flags)
- && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+ sysfs_notify_dirent_safe(replacement->sysfs_state);
+ } else if (rdev
+ && rdev->recovery_offset == MaxSector
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_and_set_bit(In_sync, &rdev->flags)) {
count++;
- sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
}
}
spin_lock_irqsave(&conf->device_lock, flags);
@@ -7902,8 +7937,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct r5conf *conf = mddev->private;
int err = 0;
int number = rdev->raid_disk;
- struct md_rdev **rdevp;
+ struct md_rdev __rcu **rdevp;
struct disk_info *p = conf->disks + number;
+ struct md_rdev *tmp;
print_raid5_conf(conf);
if (test_bit(Journal, &rdev->flags) && conf->log) {
@@ -7921,9 +7957,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
log_exit(conf);
return 0;
}
- if (rdev == p->rdev)
+ if (rdev == rcu_access_pointer(p->rdev))
rdevp = &p->rdev;
- else if (rdev == p->replacement)
+ else if (rdev == rcu_access_pointer(p->replacement))
rdevp = &p->replacement;
else
return 0;
@@ -7943,18 +7979,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
- (!p->replacement || p->replacement == rdev) &&
+ (!rcu_access_pointer(p->replacement) ||
+ rcu_access_pointer(p->replacement) == rdev) &&
number < conf->raid_disks) {
err = -EBUSY;
goto abort;
}
*rdevp = NULL;
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
+ lockdep_assert_held(&mddev->reconfig_mutex);
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
- *rdevp = rdev;
+ rcu_assign_pointer(*rdevp, rdev);
}
}
if (!err) {
@@ -7962,17 +8000,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (err)
goto abort;
}
- if (p->replacement) {
+
+ tmp = rcu_access_pointer(p->replacement);
+ if (tmp) {
/* We must have just cleared 'rdev' */
- p->rdev = p->replacement;
- clear_bit(Replacement, &p->replacement->flags);
+ rcu_assign_pointer(p->rdev, tmp);
+ clear_bit(Replacement, &tmp->flags);
smp_mb(); /* Make sure other CPUs may see both as identical
* but will never see neither - if they are careful
*/
- p->replacement = NULL;
+ rcu_assign_pointer(p->replacement, NULL);
if (!err)
- err = log_modify(conf, p->rdev, true);
+ err = log_modify(conf, tmp, true);
}
clear_bit(WantReplacement, &rdev->flags);
@@ -7988,6 +8028,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
int ret, err = -EEXIST;
int disk;
struct disk_info *p;
+ struct md_rdev *tmp;
int first = 0;
int last = conf->raid_disks - 1;
@@ -8045,7 +8086,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
}
for (disk = first; disk <= last; disk++) {
p = conf->disks + disk;
- if (test_bit(WantReplacement, &p->rdev->flags) &&
+ tmp = rdev_mdlock_deref(mddev, p->rdev);
+ if (test_bit(WantReplacement, &tmp->flags) &&
p->replacement == NULL) {
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
@@ -8336,6 +8378,7 @@ static void end_reshape(struct r5conf *conf)
static void raid5_finish_reshape(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
+ struct md_rdev *rdev;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8347,10 +8390,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
for (d = conf->raid_disks ;
d < conf->raid_disks - mddev->delta_disks;
d++) {
- struct md_rdev *rdev = conf->disks[d].rdev;
+ rdev = rdev_mdlock_deref(mddev,
+ conf->disks[d].rdev);
if (rdev)
clear_bit(In_sync, &rdev->flags);
- rdev = conf->disks[d].replacement;
+ rdev = rdev_mdlock_deref(mddev,
+ conf->disks[d].replacement);
if (rdev)
clear_bit(In_sync, &rdev->flags);
}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 9e8486a9e445..638d29863503 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -473,7 +473,8 @@ enum {
*/
struct disk_info {
- struct md_rdev *rdev, *replacement;
+ struct md_rdev __rcu *rdev;
+ struct md_rdev __rcu *replacement;
struct page *extra_page; /* extra page to use in prexor */
};
@@ -560,6 +561,16 @@ struct r5pending_data {
struct bio_list bios;
};
+struct raid5_percpu {
+ struct page *spare_page; /* Used when checking P/Q in raid6 */
+ void *scribble; /* space for constructing buffer
+ * lists and performing address
+ * conversions
+ */
+ int scribble_obj_size;
+ local_lock_t lock;
+};
+
struct r5conf {
struct hlist_head *stripe_hashtbl;
/* only protect corresponding hash list and inactive_list */
@@ -635,15 +646,7 @@ struct r5conf {
*/
int recovery_disabled;
/* per cpu variables */
- struct raid5_percpu {
- struct page *spare_page; /* Used when checking P/Q in raid6 */
- void *scribble; /* space for constructing buffer
- * lists and performing address
- * conversions
- */
- int scribble_obj_size;
- local_lock_t lock;
- } __percpu *percpu;
+ struct raid5_percpu __percpu *percpu;
int scribble_disks;
int scribble_sectors;
struct hlist_node node;
diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c
index 2e545f473cc6..019a0822bde0 100644
--- a/drivers/memory/renesas-rpc-if.c
+++ b/drivers/memory/renesas-rpc-if.c
@@ -164,25 +164,39 @@ static const struct regmap_access_table rpcif_volatile_table = {
/*
- * Custom accessor functions to ensure SMRDR0 and SMWDR0 are always accessed
- * with proper width. Requires SMENR_SPIDE to be correctly set before!
+ * Custom accessor functions to ensure SM[RW]DR[01] are always accessed with
+ * proper width. Requires rpcif.xfer_size to be correctly set before!
*/
static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val)
{
struct rpcif *rpc = context;
- if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) {
- u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF);
-
- if (spide == 0x8) {
+ switch (reg) {
+ case RPCIF_SMRDR0:
+ case RPCIF_SMWDR0:
+ switch (rpc->xfer_size) {
+ case 1:
*val = readb(rpc->base + reg);
return 0;
- } else if (spide == 0xC) {
+
+ case 2:
*val = readw(rpc->base + reg);
return 0;
- } else if (spide != 0xF) {
+
+ case 4:
+ case 8:
+ *val = readl(rpc->base + reg);
+ return 0;
+
+ default:
return -EILSEQ;
}
+
+ case RPCIF_SMRDR1:
+ case RPCIF_SMWDR1:
+ if (rpc->xfer_size != 8)
+ return -EILSEQ;
+ break;
}
*val = readl(rpc->base + reg);
@@ -193,18 +207,34 @@ static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val)
{
struct rpcif *rpc = context;
- if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) {
- u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF);
-
- if (spide == 0x8) {
+ switch (reg) {
+ case RPCIF_SMWDR0:
+ switch (rpc->xfer_size) {
+ case 1:
writeb(val, rpc->base + reg);
return 0;
- } else if (spide == 0xC) {
+
+ case 2:
writew(val, rpc->base + reg);
return 0;
- } else if (spide != 0xF) {
+
+ case 4:
+ case 8:
+ writel(val, rpc->base + reg);
+ return 0;
+
+ default:
return -EILSEQ;
}
+
+ case RPCIF_SMWDR1:
+ if (rpc->xfer_size != 8)
+ return -EILSEQ;
+ break;
+
+ case RPCIF_SMRDR0:
+ case RPCIF_SMRDR1:
+ return -EPERM;
}
writel(val, rpc->base + reg);
@@ -469,6 +499,7 @@ int rpcif_manual_xfer(struct rpcif *rpc)
smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes));
regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
+ rpc->xfer_size = nbytes;
memcpy(data, rpc->buffer + pos, nbytes);
if (nbytes == 8) {
@@ -533,6 +564,7 @@ int rpcif_manual_xfer(struct rpcif *rpc)
regmap_write(rpc->regmap, RPCIF_SMENR, smenr);
regmap_write(rpc->regmap, RPCIF_SMCR,
rpc->smcr | RPCIF_SMCR_SPIE);
+ rpc->xfer_size = nbytes;
ret = wait_msg_xfer_end(rpc);
if (ret)
goto err_out;
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 91f96abbb3f9..8d169a35cf13 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -31,6 +31,8 @@
*/
#define FM25_SN_LEN 8 /* serial number length */
+#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */
+
struct at25_data {
struct spi_eeprom chip;
struct spi_device *spi;
@@ -39,6 +41,7 @@ struct at25_data {
struct nvmem_config nvmem_config;
struct nvmem_device *nvmem;
u8 sernum[FM25_SN_LEN];
+ u8 command[EE_MAXADDRLEN + 1];
};
#define AT25_WREN 0x06 /* latch the write enable */
@@ -61,8 +64,6 @@ struct at25_data {
#define FM25_ID_LEN 9 /* ID length */
-#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */
-
/*
* Specs often allow 5ms for a page write, sometimes 20ms;
* it's important to recover from write timeouts.
@@ -78,7 +79,6 @@ static int at25_ee_read(void *priv, unsigned int offset,
{
struct at25_data *at25 = priv;
char *buf = val;
- u8 command[EE_MAXADDRLEN + 1];
u8 *cp;
ssize_t status;
struct spi_transfer t[2];
@@ -92,12 +92,15 @@ static int at25_ee_read(void *priv, unsigned int offset,
if (unlikely(!count))
return -EINVAL;
- cp = command;
+ cp = at25->command;
instr = AT25_READ;
if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
if (offset >= BIT(at25->addrlen * 8))
instr |= AT25_INSTR_BIT3;
+
+ mutex_lock(&at25->lock);
+
*cp++ = instr;
/* 8/16/24-bit address is written MSB first */
@@ -116,7 +119,7 @@ static int at25_ee_read(void *priv, unsigned int offset,
spi_message_init(&m);
memset(t, 0, sizeof(t));
- t[0].tx_buf = command;
+ t[0].tx_buf = at25->command;
t[0].len = at25->addrlen + 1;
spi_message_add_tail(&t[0], &m);
@@ -124,8 +127,6 @@ static int at25_ee_read(void *priv, unsigned int offset,
t[1].len = count;
spi_message_add_tail(&t[1], &m);
- mutex_lock(&at25->lock);
-
/*
* Read it all at once.
*
@@ -152,7 +153,7 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
spi_message_init(&m);
memset(t, 0, sizeof(t));
- t[0].tx_buf = &command;
+ t[0].tx_buf = at25->command;
t[0].len = 1;
spi_message_add_tail(&t[0], &m);
@@ -162,6 +163,8 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
mutex_lock(&at25->lock);
+ at25->command[0] = command;
+
status = spi_sync(at25->spi, &m);
dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index e7ea45386c22..efa95dc4fc4e 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1384,13 +1384,17 @@ static int mmc_select_hs400es(struct mmc_card *card)
goto out_err;
}
+ /*
+ * Bump to HS timing and frequency. Some cards don't handle
+ * SEND_STATUS reliably at the initial frequency.
+ */
mmc_set_timing(host, MMC_TIMING_MMC_HS);
+ mmc_set_bus_speed(card);
+
err = mmc_switch_status(card, true);
if (err)
goto out_err;
- mmc_set_clock(host, card->ext_csd.hs_max_dtr);
-
/* Switch card to DDR with strobe bit */
val = EXT_CSD_DDR_BUS_WIDTH_8 | EXT_CSD_BUS_WIDTH_STROBE;
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
@@ -1448,7 +1452,7 @@ out_err:
static int mmc_select_hs200(struct mmc_card *card)
{
struct mmc_host *host = card->host;
- unsigned int old_timing, old_signal_voltage;
+ unsigned int old_timing, old_signal_voltage, old_clock;
int err = -EINVAL;
u8 val;
@@ -1479,8 +1483,17 @@ static int mmc_select_hs200(struct mmc_card *card)
false, true, MMC_CMD_RETRIES);
if (err)
goto err;
+
+ /*
+ * Bump to HS timing and frequency. Some cards don't handle
+ * SEND_STATUS reliably at the initial frequency.
+ * NB: We can't move to full (HS200) speeds until after we've
+ * successfully switched over.
+ */
old_timing = host->ios.timing;
+ old_clock = host->ios.clock;
mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+ mmc_set_clock(card->host, card->ext_csd.hs_max_dtr);
/*
* For HS200, CRC errors are not a reliable way to know the
@@ -1493,8 +1506,10 @@ static int mmc_select_hs200(struct mmc_card *card)
* mmc_select_timing() assumes timing has not changed if
* it is a switch error.
*/
- if (err == -EBADMSG)
+ if (err == -EBADMSG) {
+ mmc_set_clock(host, old_clock);
mmc_set_timing(host, old_timing);
+ }
}
err:
if (err) {
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 180d7e9d3400..81c55bfd6e0c 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -21,7 +21,7 @@
#define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */
#define MMC_SANITIZE_TIMEOUT_MS (240 * 1000) /* 240s */
-#define MMC_OP_COND_PERIOD_US (1 * 1000) /* 1ms */
+#define MMC_OP_COND_PERIOD_US (4 * 1000) /* 4ms */
#define MMC_OP_COND_TIMEOUT_MS 1000 /* 1s */
static const u8 tuning_blk_pattern_4bit[] = {
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index c69b2d9df6f1..a3d446005571 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -183,14 +183,13 @@ static void mmc_queue_setup_discard(struct request_queue *q,
if (!max_discard)
return;
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
blk_queue_max_discard_sectors(q, max_discard);
q->limits.discard_granularity = card->pref_erase << 9;
/* granularity must not be greater than max. discard */
if (card->pref_erase > max_discard)
q->limits.discard_granularity = SECTOR_SIZE;
if (mmc_can_secure_erase_trim(card))
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
+ blk_queue_max_secure_erase_sectors(q, max_discard);
}
static unsigned short mmc_get_max_segments(struct mmc_host *host)
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 50c71e0ba5e4..ff9f5b63c337 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -17,6 +17,7 @@
#include <linux/regulator/consumer.h>
#include <linux/interconnect.h>
#include <linux/pinctrl/consumer.h>
+#include <linux/reset.h>
#include "sdhci-pltfm.h"
#include "cqhci.h"
@@ -2482,6 +2483,43 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev,
of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config);
}
+static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host)
+{
+ struct reset_control *reset;
+ int ret = 0;
+
+ reset = reset_control_get_optional_exclusive(dev, NULL);
+ if (IS_ERR(reset))
+ return dev_err_probe(dev, PTR_ERR(reset),
+ "unable to acquire core_reset\n");
+
+ if (!reset)
+ return ret;
+
+ ret = reset_control_assert(reset);
+ if (ret) {
+ reset_control_put(reset);
+ return dev_err_probe(dev, ret, "core_reset assert failed\n");
+ }
+
+ /*
+ * The hardware requirement for delay between assert/deassert
+ * is at least 3-4 sleep clock (32.7KHz) cycles, which comes to
+ * ~125us (4/32768). To be on the safe side add 200us delay.
+ */
+ usleep_range(200, 210);
+
+ ret = reset_control_deassert(reset);
+ if (ret) {
+ reset_control_put(reset);
+ return dev_err_probe(dev, ret, "core_reset deassert failed\n");
+ }
+
+ usleep_range(200, 210);
+ reset_control_put(reset);
+
+ return ret;
+}
static int sdhci_msm_probe(struct platform_device *pdev)
{
@@ -2529,6 +2567,10 @@ static int sdhci_msm_probe(struct platform_device *pdev)
msm_host->saved_tuning_phase = INVALID_TUNING_PHASE;
+ ret = sdhci_msm_gcc_reset(&pdev->dev, host);
+ if (ret)
+ goto pltfm_free;
+
/* Setup SDCC bus voter clock. */
msm_host->bus_clk = devm_clk_get(&pdev->dev, "bus");
if (!IS_ERR(msm_host->bus_clk)) {
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index c62afd212692..46f9e2923d86 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -377,8 +377,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host,
pdes[i].buf_addr_ptr1 =
cpu_to_le32(sg_dma_address(&data->sg[i]) >>
host->cfg->idma_des_shift);
- pdes[i].buf_addr_ptr2 = cpu_to_le32((u32)next_desc >>
- host->cfg->idma_des_shift);
+ pdes[i].buf_addr_ptr2 =
+ cpu_to_le32(next_desc >>
+ host->cfg->idma_des_shift);
}
pdes[0].config |= cpu_to_le32(SDXC_IDMAC_DES0_FD);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 64d2b093f114..f73172111465 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -377,7 +377,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
if (tr->discard) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
new->rq->limits.discard_granularity = tr->blksize;
}
diff --git a/drivers/mtd/nand/raw/mtk_ecc.c b/drivers/mtd/nand/raw/mtk_ecc.c
index e7df3dac705e..49ab3448b9b1 100644
--- a/drivers/mtd/nand/raw/mtk_ecc.c
+++ b/drivers/mtd/nand/raw/mtk_ecc.c
@@ -43,6 +43,7 @@
struct mtk_ecc_caps {
u32 err_mask;
+ u32 err_shift;
const u8 *ecc_strength;
const u32 *ecc_regs;
u8 num_ecc_strength;
@@ -76,7 +77,7 @@ static const u8 ecc_strength_mt2712[] = {
};
static const u8 ecc_strength_mt7622[] = {
- 4, 6, 8, 10, 12, 14, 16
+ 4, 6, 8, 10, 12
};
enum mtk_ecc_regs {
@@ -221,7 +222,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
for (i = 0; i < sectors; i++) {
offset = (i >> 2) << 2;
err = readl(ecc->regs + ECC_DECENUM0 + offset);
- err = err >> ((i % 4) * 8);
+ err = err >> ((i % 4) * ecc->caps->err_shift);
err &= ecc->caps->err_mask;
if (err == ecc->caps->err_mask) {
/* uncorrectable errors */
@@ -449,6 +450,7 @@ EXPORT_SYMBOL(mtk_ecc_get_parity_bits);
static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
.err_mask = 0x3f,
+ .err_shift = 8,
.ecc_strength = ecc_strength_mt2701,
.ecc_regs = mt2701_ecc_regs,
.num_ecc_strength = 20,
@@ -459,6 +461,7 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
.err_mask = 0x7f,
+ .err_shift = 8,
.ecc_strength = ecc_strength_mt2712,
.ecc_regs = mt2712_ecc_regs,
.num_ecc_strength = 23,
@@ -468,10 +471,11 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
};
static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = {
- .err_mask = 0x3f,
+ .err_mask = 0x1f,
+ .err_shift = 5,
.ecc_strength = ecc_strength_mt7622,
.ecc_regs = mt7622_ecc_regs,
- .num_ecc_strength = 7,
+ .num_ecc_strength = 5,
.ecc_mode_shift = 4,
.parity_bits = 13,
.pg_irq_sel = 0,
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 1a77542c6d67..048b255faa76 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2651,10 +2651,23 @@ static int qcom_nand_attach_chip(struct nand_chip *chip)
ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops);
+ /* Free the initially allocated BAM transaction for reading the ONFI params */
+ if (nandc->props->is_bam)
+ free_bam_transaction(nandc);
nandc->max_cwperpage = max_t(unsigned int, nandc->max_cwperpage,
cwperpage);
+ /* Now allocate the BAM transaction based on updated max_cwperpage */
+ if (nandc->props->is_bam) {
+ nandc->bam_txn = alloc_bam_transaction(nandc);
+ if (!nandc->bam_txn) {
+ dev_err(nandc->dev,
+ "failed to allocate bam transaction\n");
+ return -ENOMEM;
+ }
+ }
+
/*
* DATA_UD_BYTES varies based on whether the read/write command protects
* spare data with ECC too. We protect spare data by default, so we set
@@ -2955,17 +2968,6 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
if (ret)
return ret;
- if (nandc->props->is_bam) {
- free_bam_transaction(nandc);
- nandc->bam_txn = alloc_bam_transaction(nandc);
- if (!nandc->bam_txn) {
- dev_err(nandc->dev,
- "failed to allocate bam transaction\n");
- nand_cleanup(chip);
- return -ENOMEM;
- }
- }
-
ret = mtd_device_parse_register(mtd, probes, NULL, NULL, 0);
if (ret)
nand_cleanup(chip);
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index b85b9c6fcc42..a278829469d6 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -384,7 +384,8 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
dma_addr_t dma_addr;
dma_cookie_t cookie;
uint32_t reg;
- int ret;
+ int ret = 0;
+ unsigned long time_left;
if (dir == DMA_FROM_DEVICE) {
chan = flctl->chan_fifo0_rx;
@@ -425,13 +426,14 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
goto out;
}
- ret =
+ time_left =
wait_for_completion_timeout(&flctl->dma_complete,
msecs_to_jiffies(3000));
- if (ret <= 0) {
+ if (time_left == 0) {
dmaengine_terminate_all(chan);
dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n");
+ ret = -ETIMEDOUT;
}
out:
@@ -441,7 +443,7 @@ out:
dma_unmap_single(chan->device->dev, dma_addr, len, dir);
- /* ret > 0 is success */
+ /* ret == 0 is success */
return ret;
}
@@ -465,7 +467,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
/* initiate DMA transfer */
if (flctl->chan_fifo0_rx && rlen >= 32 &&
- flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0)
+ !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE))
goto convert; /* DMA success */
/* do polling transfer */
@@ -524,7 +526,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
/* initiate DMA transfer */
if (flctl->chan_fifo0_tx && rlen >= 32 &&
- flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0)
+ !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE))
return; /* DMA success */
/* do polling transfer */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 15eddca7b4b6..38e152548126 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4027,14 +4027,19 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v
return true;
}
-static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
+static u32 bond_ip_hash(u32 hash, struct flow_keys *flow, int xmit_policy)
{
hash ^= (__force u32)flow_get_u32_dst(flow) ^
(__force u32)flow_get_u32_src(flow);
hash ^= (hash >> 16);
hash ^= (hash >> 8);
+
/* discard lowest hash bit to deal with the common even ports pattern */
- return hash >> 1;
+ if (xmit_policy == BOND_XMIT_POLICY_LAYER34 ||
+ xmit_policy == BOND_XMIT_POLICY_ENCAP34)
+ return hash >> 1;
+
+ return hash;
}
/* Generate hash based on xmit policy. If @skb is given it is used to linearize
@@ -4064,7 +4069,7 @@ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const voi
memcpy(&hash, &flow.ports.ports, sizeof(hash));
}
- return bond_ip_hash(hash, &flow);
+ return bond_ip_hash(hash, &flow, bond->params.xmit_policy);
}
/**
@@ -5259,7 +5264,7 @@ static u32 bond_sk_hash_l34(struct sock *sk)
/* L4 */
memcpy(&hash, &flow.ports.ports, sizeof(hash));
/* L3 */
- return bond_ip_hash(hash, &flow);
+ return bond_ip_hash(hash, &flow, BOND_XMIT_POLICY_LAYER34);
}
static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond,
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index d0c5a7a60daf..5215bd9b2c80 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -241,13 +241,14 @@ struct grcan_device_config {
.rxsize = GRCAN_DEFAULT_BUFFER_SIZE, \
}
-#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 0x4100
+#define GRCAN_TXBUG_SAFE_GRLIB_VERSION 4100
#define GRLIB_VERSION_MASK 0xffff
/* GRCAN private data structure */
struct grcan_priv {
struct can_priv can; /* must be the first member */
struct net_device *dev;
+ struct device *ofdev_dev;
struct napi_struct napi;
struct grcan_registers __iomem *regs; /* ioremap'ed registers */
@@ -921,7 +922,7 @@ static void grcan_free_dma_buffers(struct net_device *dev)
struct grcan_priv *priv = netdev_priv(dev);
struct grcan_dma *dma = &priv->dma;
- dma_free_coherent(&dev->dev, dma->base_size, dma->base_buf,
+ dma_free_coherent(priv->ofdev_dev, dma->base_size, dma->base_buf,
dma->base_handle);
memset(dma, 0, sizeof(*dma));
}
@@ -946,7 +947,7 @@ static int grcan_allocate_dma_buffers(struct net_device *dev,
/* Extra GRCAN_BUFFER_ALIGNMENT to allow for alignment */
dma->base_size = lsize + ssize + GRCAN_BUFFER_ALIGNMENT;
- dma->base_buf = dma_alloc_coherent(&dev->dev,
+ dma->base_buf = dma_alloc_coherent(priv->ofdev_dev,
dma->base_size,
&dma->base_handle,
GFP_KERNEL);
@@ -1102,8 +1103,10 @@ static int grcan_close(struct net_device *dev)
priv->closing = true;
if (priv->need_txbug_workaround) {
+ spin_unlock_irqrestore(&priv->lock, flags);
del_timer_sync(&priv->hang_timer);
del_timer_sync(&priv->rr_timer);
+ spin_lock_irqsave(&priv->lock, flags);
}
netif_stop_queue(dev);
grcan_stop_hardware(dev);
@@ -1122,7 +1125,7 @@ static int grcan_close(struct net_device *dev)
return 0;
}
-static int grcan_transmit_catch_up(struct net_device *dev, int budget)
+static void grcan_transmit_catch_up(struct net_device *dev)
{
struct grcan_priv *priv = netdev_priv(dev);
unsigned long flags;
@@ -1130,7 +1133,7 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget)
spin_lock_irqsave(&priv->lock, flags);
- work_done = catch_up_echo_skb(dev, budget, true);
+ work_done = catch_up_echo_skb(dev, -1, true);
if (work_done) {
if (!priv->resetting && !priv->closing &&
!(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
@@ -1144,8 +1147,6 @@ static int grcan_transmit_catch_up(struct net_device *dev, int budget)
}
spin_unlock_irqrestore(&priv->lock, flags);
-
- return work_done;
}
static int grcan_receive(struct net_device *dev, int budget)
@@ -1227,19 +1228,13 @@ static int grcan_poll(struct napi_struct *napi, int budget)
struct net_device *dev = priv->dev;
struct grcan_registers __iomem *regs = priv->regs;
unsigned long flags;
- int tx_work_done, rx_work_done;
- int rx_budget = budget / 2;
- int tx_budget = budget - rx_budget;
+ int work_done;
- /* Half of the budget for receiving messages */
- rx_work_done = grcan_receive(dev, rx_budget);
+ work_done = grcan_receive(dev, budget);
- /* Half of the budget for transmitting messages as that can trigger echo
- * frames being received
- */
- tx_work_done = grcan_transmit_catch_up(dev, tx_budget);
+ grcan_transmit_catch_up(dev);
- if (rx_work_done < rx_budget && tx_work_done < tx_budget) {
+ if (work_done < budget) {
napi_complete(napi);
/* Guarantee no interference with a running reset that otherwise
@@ -1256,7 +1251,7 @@ static int grcan_poll(struct napi_struct *napi, int budget)
spin_unlock_irqrestore(&priv->lock, flags);
}
- return rx_work_done + tx_work_done;
+ return work_done;
}
/* Work tx bug by waiting while for the risky situation to clear. If that fails,
@@ -1587,6 +1582,7 @@ static int grcan_setup_netdev(struct platform_device *ofdev,
memcpy(&priv->config, &grcan_module_config,
sizeof(struct grcan_device_config));
priv->dev = dev;
+ priv->ofdev_dev = &ofdev->dev;
priv->regs = base;
priv->can.bittiming_const = &grcan_bittiming_const;
priv->can.do_set_bittiming = grcan_set_bittiming;
@@ -1639,6 +1635,7 @@ exit_free_candev:
static int grcan_probe(struct platform_device *ofdev)
{
struct device_node *np = ofdev->dev.of_node;
+ struct device_node *sysid_parent;
u32 sysid, ambafreq;
int irq, err;
void __iomem *base;
@@ -1647,10 +1644,15 @@ static int grcan_probe(struct platform_device *ofdev)
/* Compare GRLIB version number with the first that does not
* have the tx bug (see start_xmit)
*/
- err = of_property_read_u32(np, "systemid", &sysid);
- if (!err && ((sysid & GRLIB_VERSION_MASK)
- >= GRCAN_TXBUG_SAFE_GRLIB_VERSION))
- txbug = false;
+ sysid_parent = of_find_node_by_path("/ambapp0");
+ if (sysid_parent) {
+ of_node_get(sysid_parent);
+ err = of_property_read_u32(sysid_parent, "systemid", &sysid);
+ if (!err && ((sysid & GRLIB_VERSION_MASK) >=
+ GRCAN_TXBUG_SAFE_GRLIB_VERSION))
+ txbug = false;
+ of_node_put(sysid_parent);
+ }
err = of_property_read_u32(np, "freq", &ambafreq);
if (err) {
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index b3b5bc1c803b..088bb1bcf1ef 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1495,34 +1495,22 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
if (err)
return err;
- cdev->can.bittiming_const = cdev->bit_timing ?
- cdev->bit_timing : &m_can_bittiming_const_30X;
-
- cdev->can.data_bittiming_const = cdev->data_timing ?
- cdev->data_timing :
- &m_can_data_bittiming_const_30X;
+ cdev->can.bittiming_const = &m_can_bittiming_const_30X;
+ cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X;
break;
case 31:
/* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */
err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
if (err)
return err;
- cdev->can.bittiming_const = cdev->bit_timing ?
- cdev->bit_timing : &m_can_bittiming_const_31X;
-
- cdev->can.data_bittiming_const = cdev->data_timing ?
- cdev->data_timing :
- &m_can_data_bittiming_const_31X;
+ cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+ cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
break;
case 32:
case 33:
/* Support both MCAN version v3.2.x and v3.3.0 */
- cdev->can.bittiming_const = cdev->bit_timing ?
- cdev->bit_timing : &m_can_bittiming_const_31X;
-
- cdev->can.data_bittiming_const = cdev->data_timing ?
- cdev->data_timing :
- &m_can_data_bittiming_const_31X;
+ cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+ cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
cdev->can.ctrlmode_supported |=
(m_can_niso_supported(cdev) ?
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index 2c5d40997168..d18b515e6ccc 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -85,9 +85,6 @@ struct m_can_classdev {
struct sk_buff *tx_skb;
struct phy *transceiver;
- const struct can_bittiming_const *bit_timing;
- const struct can_bittiming_const *data_timing;
-
struct m_can_ops *ops;
int version;
diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
index b56a54d6c5a9..8f184a852a0a 100644
--- a/drivers/net/can/m_can/m_can_pci.c
+++ b/drivers/net/can/m_can/m_can_pci.c
@@ -18,14 +18,9 @@
#define M_CAN_PCI_MMIO_BAR 0
+#define M_CAN_CLOCK_FREQ_EHL 200000000
#define CTL_CSR_INT_CTL_OFFSET 0x508
-struct m_can_pci_config {
- const struct can_bittiming_const *bit_timing;
- const struct can_bittiming_const *data_timing;
- unsigned int clock_freq;
-};
-
struct m_can_pci_priv {
struct m_can_classdev cdev;
@@ -89,40 +84,9 @@ static struct m_can_ops m_can_pci_ops = {
.read_fifo = iomap_read_fifo,
};
-static const struct can_bittiming_const m_can_bittiming_const_ehl = {
- .name = KBUILD_MODNAME,
- .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */
- .tseg1_max = 64,
- .tseg2_min = 1, /* Time segment 2 = phase_seg2 */
- .tseg2_max = 128,
- .sjw_max = 128,
- .brp_min = 1,
- .brp_max = 512,
- .brp_inc = 1,
-};
-
-static const struct can_bittiming_const m_can_data_bittiming_const_ehl = {
- .name = KBUILD_MODNAME,
- .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */
- .tseg1_max = 16,
- .tseg2_min = 1, /* Time segment 2 = phase_seg2 */
- .tseg2_max = 8,
- .sjw_max = 4,
- .brp_min = 1,
- .brp_max = 32,
- .brp_inc = 1,
-};
-
-static const struct m_can_pci_config m_can_pci_ehl = {
- .bit_timing = &m_can_bittiming_const_ehl,
- .data_timing = &m_can_data_bittiming_const_ehl,
- .clock_freq = 200000000,
-};
-
static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
{
struct device *dev = &pci->dev;
- const struct m_can_pci_config *cfg;
struct m_can_classdev *mcan_class;
struct m_can_pci_priv *priv;
void __iomem *base;
@@ -150,8 +114,6 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
if (!mcan_class)
return -ENOMEM;
- cfg = (const struct m_can_pci_config *)id->driver_data;
-
priv = cdev_to_priv(mcan_class);
priv->base = base;
@@ -163,9 +125,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
mcan_class->dev = &pci->dev;
mcan_class->net->irq = pci_irq_vector(pci, 0);
mcan_class->pm_clock_support = 1;
- mcan_class->bit_timing = cfg->bit_timing;
- mcan_class->data_timing = cfg->data_timing;
- mcan_class->can.clock.freq = cfg->clock_freq;
+ mcan_class->can.clock.freq = id->driver_data;
mcan_class->ops = &m_can_pci_ops;
pci_set_drvdata(pci, mcan_class);
@@ -218,8 +178,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops,
m_can_pci_suspend, m_can_pci_resume);
static const struct pci_device_id m_can_pci_id_table[] = {
- { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, },
- { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, },
+ { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, },
+ { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, },
{ } /* Terminating Entry */
};
MODULE_DEVICE_TABLE(pci, m_can_pci_id_table);
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 77501f9c5915..fbb32aa49b24 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1354,46 +1354,25 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port,
config->legacy_pre_march2020 = false;
}
-int b53_phylink_mac_link_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state)
+static struct phylink_pcs *b53_phylink_mac_select_pcs(struct dsa_switch *ds,
+ int port,
+ phy_interface_t interface)
{
struct b53_device *dev = ds->priv;
- int ret = -EOPNOTSUPP;
- if ((phy_interface_mode_is_8023z(state->interface) ||
- state->interface == PHY_INTERFACE_MODE_SGMII) &&
- dev->ops->serdes_link_state)
- ret = dev->ops->serdes_link_state(dev, port, state);
+ if (!dev->ops->phylink_mac_select_pcs)
+ return NULL;
- return ret;
+ return dev->ops->phylink_mac_select_pcs(dev, port, interface);
}
-EXPORT_SYMBOL(b53_phylink_mac_link_state);
void b53_phylink_mac_config(struct dsa_switch *ds, int port,
unsigned int mode,
const struct phylink_link_state *state)
{
- struct b53_device *dev = ds->priv;
-
- if (mode == MLO_AN_PHY || mode == MLO_AN_FIXED)
- return;
-
- if ((phy_interface_mode_is_8023z(state->interface) ||
- state->interface == PHY_INTERFACE_MODE_SGMII) &&
- dev->ops->serdes_config)
- dev->ops->serdes_config(dev, port, mode, state);
}
EXPORT_SYMBOL(b53_phylink_mac_config);
-void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port)
-{
- struct b53_device *dev = ds->priv;
-
- if (dev->ops->serdes_an_restart)
- dev->ops->serdes_an_restart(dev, port);
-}
-EXPORT_SYMBOL(b53_phylink_mac_an_restart);
-
void b53_phylink_mac_link_down(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface)
@@ -2269,9 +2248,8 @@ static const struct dsa_switch_ops b53_switch_ops = {
.phy_write = b53_phy_write16,
.adjust_link = b53_adjust_link,
.phylink_get_caps = b53_phylink_get_caps,
- .phylink_mac_link_state = b53_phylink_mac_link_state,
+ .phylink_mac_select_pcs = b53_phylink_mac_select_pcs,
.phylink_mac_config = b53_phylink_mac_config,
- .phylink_mac_an_restart = b53_phylink_mac_an_restart,
.phylink_mac_link_down = b53_phylink_mac_link_down,
.phylink_mac_link_up = b53_phylink_mac_link_up,
.port_enable = b53_enable_port,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 3085b6cc7d40..795cbffd5c2b 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -21,7 +21,7 @@
#include <linux/kernel.h>
#include <linux/mutex.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
#include <linux/etherdevice.h>
#include <net/dsa.h>
@@ -29,7 +29,6 @@
struct b53_device;
struct net_device;
-struct phylink_link_state;
struct b53_io_ops {
int (*read8)(struct b53_device *dev, u8 page, u8 reg, u8 *value);
@@ -48,13 +47,10 @@ struct b53_io_ops {
void (*irq_disable)(struct b53_device *dev, int port);
void (*phylink_get_caps)(struct b53_device *dev, int port,
struct phylink_config *config);
+ struct phylink_pcs *(*phylink_mac_select_pcs)(struct b53_device *dev,
+ int port,
+ phy_interface_t interface);
u8 (*serdes_map_lane)(struct b53_device *dev, int port);
- int (*serdes_link_state)(struct b53_device *dev, int port,
- struct phylink_link_state *state);
- void (*serdes_config)(struct b53_device *dev, int port,
- unsigned int mode,
- const struct phylink_link_state *state);
- void (*serdes_an_restart)(struct b53_device *dev, int port);
void (*serdes_link_set)(struct b53_device *dev, int port,
unsigned int mode, phy_interface_t interface,
bool link_up);
@@ -85,8 +81,15 @@ enum {
BCM7278_DEVICE_ID = 0x7278,
};
+struct b53_pcs {
+ struct phylink_pcs pcs;
+ struct b53_device *dev;
+ u8 lane;
+};
+
#define B53_N_PORTS 9
#define B53_N_PORTS_25 6
+#define B53_N_PCS 2
struct b53_port {
u16 vlan_ctl_mask;
@@ -143,6 +146,8 @@ struct b53_device {
bool vlan_enabled;
unsigned int num_ports;
struct b53_port *ports;
+
+ struct b53_pcs pcs[B53_N_PCS];
};
#define b53_for_each_port(dev, i) \
@@ -336,12 +341,9 @@ int b53_br_flags(struct dsa_switch *ds, int port,
struct netlink_ext_ack *extack);
int b53_setup_devlink_resources(struct dsa_switch *ds);
void b53_port_event(struct dsa_switch *ds, int port);
-int b53_phylink_mac_link_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state);
void b53_phylink_mac_config(struct dsa_switch *ds, int port,
unsigned int mode,
const struct phylink_link_state *state);
-void b53_phylink_mac_an_restart(struct dsa_switch *ds, int port);
void b53_phylink_mac_link_down(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
diff --git a/drivers/net/dsa/b53/b53_serdes.c b/drivers/net/dsa/b53/b53_serdes.c
index 555e5b372321..0690210770ff 100644
--- a/drivers/net/dsa/b53/b53_serdes.c
+++ b/drivers/net/dsa/b53/b53_serdes.c
@@ -17,6 +17,11 @@
#include "b53_serdes.h"
#include "b53_regs.h"
+static inline struct b53_pcs *pcs_to_b53_pcs(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct b53_pcs, pcs);
+}
+
static void b53_serdes_write_blk(struct b53_device *dev, u8 offset, u16 block,
u16 value)
{
@@ -60,51 +65,47 @@ static u16 b53_serdes_read(struct b53_device *dev, u8 lane,
return b53_serdes_read_blk(dev, offset, block);
}
-void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode,
- const struct phylink_link_state *state)
+static int b53_serdes_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
{
- u8 lane = b53_serdes_map_lane(dev, port);
+ struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev;
+ u8 lane = pcs_to_b53_pcs(pcs)->lane;
u16 reg;
- if (lane == B53_INVALID_LANE)
- return;
-
reg = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_CONTROL(1),
SERDES_DIGITAL_BLK);
- if (state->interface == PHY_INTERFACE_MODE_1000BASEX)
+ if (interface == PHY_INTERFACE_MODE_1000BASEX)
reg |= FIBER_MODE_1000X;
else
reg &= ~FIBER_MODE_1000X;
b53_serdes_write(dev, lane, B53_SERDES_DIGITAL_CONTROL(1),
SERDES_DIGITAL_BLK, reg);
+
+ return 0;
}
-EXPORT_SYMBOL(b53_serdes_config);
-void b53_serdes_an_restart(struct b53_device *dev, int port)
+static void b53_serdes_an_restart(struct phylink_pcs *pcs)
{
- u8 lane = b53_serdes_map_lane(dev, port);
+ struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev;
+ u8 lane = pcs_to_b53_pcs(pcs)->lane;
u16 reg;
- if (lane == B53_INVALID_LANE)
- return;
-
reg = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMCR),
SERDES_MII_BLK);
reg |= BMCR_ANRESTART;
b53_serdes_write(dev, lane, B53_SERDES_MII_REG(MII_BMCR),
SERDES_MII_BLK, reg);
}
-EXPORT_SYMBOL(b53_serdes_an_restart);
-int b53_serdes_link_state(struct b53_device *dev, int port,
- struct phylink_link_state *state)
+static void b53_serdes_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
{
- u8 lane = b53_serdes_map_lane(dev, port);
+ struct b53_device *dev = pcs_to_b53_pcs(pcs)->dev;
+ u8 lane = pcs_to_b53_pcs(pcs)->lane;
u16 dig, bmsr;
- if (lane == B53_INVALID_LANE)
- return 1;
-
dig = b53_serdes_read(dev, lane, B53_SERDES_DIGITAL_STATUS,
SERDES_DIGITAL_BLK);
bmsr = b53_serdes_read(dev, lane, B53_SERDES_MII_REG(MII_BMSR),
@@ -133,10 +134,7 @@ int b53_serdes_link_state(struct b53_device *dev, int port,
state->pause |= MLO_PAUSE_RX;
if (dig & PAUSE_RESOLUTION_TX_SIDE)
state->pause |= MLO_PAUSE_TX;
-
- return 0;
}
-EXPORT_SYMBOL(b53_serdes_link_state);
void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode,
phy_interface_t interface, bool link_up)
@@ -158,6 +156,12 @@ void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode,
}
EXPORT_SYMBOL(b53_serdes_link_set);
+static const struct phylink_pcs_ops b53_pcs_ops = {
+ .pcs_get_state = b53_serdes_get_state,
+ .pcs_config = b53_serdes_config,
+ .pcs_an_restart = b53_serdes_an_restart,
+};
+
void b53_serdes_phylink_get_caps(struct b53_device *dev, int port,
struct phylink_config *config)
{
@@ -187,9 +191,28 @@ void b53_serdes_phylink_get_caps(struct b53_device *dev, int port,
}
EXPORT_SYMBOL(b53_serdes_phylink_get_caps);
+struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev,
+ int port,
+ phy_interface_t interface)
+{
+ u8 lane = b53_serdes_map_lane(dev, port);
+
+ if (lane == B53_INVALID_LANE || lane >= B53_N_PCS ||
+ !dev->pcs[lane].dev)
+ return NULL;
+
+ if (!phy_interface_mode_is_8023z(interface) &&
+ interface != PHY_INTERFACE_MODE_SGMII)
+ return NULL;
+
+ return &dev->pcs[lane].pcs;
+}
+EXPORT_SYMBOL(b53_serdes_phylink_mac_select_pcs);
+
int b53_serdes_init(struct b53_device *dev, int port)
{
u8 lane = b53_serdes_map_lane(dev, port);
+ struct b53_pcs *pcs;
u16 id0, msb, lsb;
if (lane == B53_INVALID_LANE)
@@ -212,6 +235,11 @@ int b53_serdes_init(struct b53_device *dev, int port)
(id0 >> SERDES_ID0_REV_NUM_SHIFT) & SERDES_ID0_REV_NUM_MASK,
(u32)msb << 16 | lsb);
+ pcs = &dev->pcs[lane];
+ pcs->dev = dev;
+ pcs->lane = lane;
+ pcs->pcs.ops = &b53_pcs_ops;
+
return 0;
}
EXPORT_SYMBOL(b53_serdes_init);
diff --git a/drivers/net/dsa/b53/b53_serdes.h b/drivers/net/dsa/b53/b53_serdes.h
index f47d5caa7557..ef81f5da5f81 100644
--- a/drivers/net/dsa/b53/b53_serdes.h
+++ b/drivers/net/dsa/b53/b53_serdes.h
@@ -107,14 +107,11 @@ static inline u8 b53_serdes_map_lane(struct b53_device *dev, int port)
return dev->ops->serdes_map_lane(dev, port);
}
-int b53_serdes_get_link(struct b53_device *dev, int port);
-int b53_serdes_link_state(struct b53_device *dev, int port,
- struct phylink_link_state *state);
-void b53_serdes_config(struct b53_device *dev, int port, unsigned int mode,
- const struct phylink_link_state *state);
-void b53_serdes_an_restart(struct b53_device *dev, int port);
void b53_serdes_link_set(struct b53_device *dev, int port, unsigned int mode,
phy_interface_t interface, bool link_up);
+struct phylink_pcs *b53_serdes_phylink_mac_select_pcs(struct b53_device *dev,
+ int port,
+ phy_interface_t interface);
void b53_serdes_phylink_get_caps(struct b53_device *dev, int port,
struct phylink_config *config);
#if IS_ENABLED(CONFIG_B53_SERDES)
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index c51b716657db..da0b889880f6 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -491,10 +491,8 @@ static const struct b53_io_ops b53_srab_ops = {
.irq_disable = b53_srab_irq_disable,
.phylink_get_caps = b53_srab_phylink_get_caps,
#if IS_ENABLED(CONFIG_B53_SERDES)
+ .phylink_mac_select_pcs = b53_serdes_phylink_mac_select_pcs,
.serdes_map_lane = b53_srab_serdes_map_lane,
- .serdes_link_state = b53_serdes_link_state,
- .serdes_config = b53_serdes_config,
- .serdes_an_restart = b53_serdes_an_restart,
.serdes_link_set = b53_serdes_link_set,
#endif
};
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index cf82b1fa9725..87e81c636339 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -809,6 +809,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
u32 reg, offset;
+ if (priv->wol_ports_mask & BIT(port))
+ return;
+
if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
if (priv->type == BCM4908_DEVICE_ID ||
priv->type == BCM7445_DEVICE_ID)
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index a416240d001b..12c15da55664 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1681,9 +1681,6 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
break;
case PHY_INTERFACE_MODE_RMII:
miicfg |= GSWIP_MII_CFG_MODE_RMIIM;
-
- /* Configure the RMII clock as output: */
- miicfg |= GSWIP_MII_CFG_RMII_CLK;
break;
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 8222c8a6c5ec..7310d19d1f06 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -1021,14 +1021,32 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
bool ingress, struct netlink_ext_ack *extack)
{
struct ksz_device *dev = ds->priv;
+ u8 data;
+ int p;
+
+ /* Limit to one sniffer port
+ * Check if any of the port is already set for sniffing
+ * If yes, instruct the user to remove the previous entry & exit
+ */
+ for (p = 0; p < dev->port_cnt; p++) {
+ /* Skip the current sniffing port */
+ if (p == mirror->to_local_port)
+ continue;
+
+ ksz_pread8(dev, p, P_MIRROR_CTRL, &data);
+
+ if (data & PORT_MIRROR_SNIFFER) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Sniffer port is already configured, delete existing rules & retry");
+ return -EBUSY;
+ }
+ }
if (ingress)
ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
else
ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true);
- ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false);
-
/* configure mirror port */
ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
PORT_MIRROR_SNIFFER, true);
@@ -1042,16 +1060,28 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror)
{
struct ksz_device *dev = ds->priv;
+ bool in_use = false;
u8 data;
+ int p;
if (mirror->ingress)
ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false);
else
ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false);
- ksz_pread8(dev, port, P_MIRROR_CTRL, &data);
- if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX)))
+ /* Check if any of the port is still referring to sniffer port */
+ for (p = 0; p < dev->port_cnt; p++) {
+ ksz_pread8(dev, p, P_MIRROR_CTRL, &data);
+
+ if ((data & (PORT_MIRROR_RX | PORT_MIRROR_TX))) {
+ in_use = true;
+ break;
+ }
+ }
+
+ /* delete sniffing if there are no other mirroring rules */
+ if (!in_use)
ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
PORT_MIRROR_SNIFFER, false);
}
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 19f0035d4410..fe3cb26f4287 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2229,6 +2229,7 @@ mt7530_setup(struct dsa_switch *ds)
ret = of_get_phy_mode(mac_np, &interface);
if (ret && ret != -ENODEV) {
of_node_put(mac_np);
+ of_node_put(phy_node);
return ret;
}
id = of_mdio_parse_addr(ds->dev, phy_node);
diff --git a/drivers/net/dsa/mv88e6xxx/port_hidden.c b/drivers/net/dsa/mv88e6xxx/port_hidden.c
index b49d05f0e117..7a9f9ff6dedf 100644
--- a/drivers/net/dsa/mv88e6xxx/port_hidden.c
+++ b/drivers/net/dsa/mv88e6xxx/port_hidden.c
@@ -40,8 +40,9 @@ int mv88e6xxx_port_hidden_wait(struct mv88e6xxx_chip *chip)
{
int bit = __bf_shf(MV88E6XXX_PORT_RESERVED_1A_BUSY);
- return mv88e6xxx_wait_bit(chip, MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT,
- MV88E6XXX_PORT_RESERVED_1A, bit, 0);
+ return mv88e6xxx_port_wait_bit(chip,
+ MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT,
+ MV88E6XXX_PORT_RESERVED_1A, bit, 0);
}
int mv88e6xxx_port_hidden_read(struct mv88e6xxx_chip *chip, int block, int port,
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 9e28219b223d..faccfb3f0158 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -403,6 +403,7 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds,
{
struct ocelot *ocelot = ds->priv;
struct felix *felix = ocelot_to_felix(ocelot);
+ struct ocelot_vcap_block *block_vcap_is2;
struct ocelot_vcap_filter *trap;
enum ocelot_mask_mode mask_mode;
unsigned long port_mask;
@@ -422,9 +423,13 @@ static int felix_update_trapping_destinations(struct dsa_switch *ds,
/* We are sure that "cpu" was found, otherwise
* dsa_tree_setup_default_cpu() would have failed earlier.
*/
+ block_vcap_is2 = &ocelot->block[VCAP_IS2];
/* Make sure all traps are set up for that destination */
- list_for_each_entry(trap, &ocelot->traps, trap_list) {
+ list_for_each_entry(trap, &block_vcap_is2->rules, list) {
+ if (!trap->is_trap)
+ continue;
+
/* Figure out the current trapping destination */
if (using_tag_8021q) {
/* Redirect to the tag_8021q CPU port. If timestamps
diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c
index 31e1f100e48e..c58f49d558d2 100644
--- a/drivers/net/dsa/realtek/realtek-mdio.c
+++ b/drivers/net/dsa/realtek/realtek-mdio.c
@@ -267,7 +267,6 @@ static const struct of_device_id realtek_mdio_of_match[] = {
#endif
#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB)
{ .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, },
- { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, },
#endif
{ /* sentinel */ },
};
diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c
index 6cec559c90ce..45992f79ec8d 100644
--- a/drivers/net/dsa/realtek/realtek-smi.c
+++ b/drivers/net/dsa/realtek/realtek-smi.c
@@ -551,10 +551,6 @@ static const struct of_device_id realtek_smi_of_match[] = {
.compatible = "realtek,rtl8365mb",
.data = &rtl8365mb_variant,
},
- {
- .compatible = "realtek,rtl8367s",
- .data = &rtl8365mb_variant,
- },
#endif
{ /* sentinel */ },
};
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index bd4cb9d7c35d..827993022386 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -35,15 +35,6 @@ source "drivers/net/ethernet/aquantia/Kconfig"
source "drivers/net/ethernet/arc/Kconfig"
source "drivers/net/ethernet/asix/Kconfig"
source "drivers/net/ethernet/atheros/Kconfig"
-source "drivers/net/ethernet/broadcom/Kconfig"
-source "drivers/net/ethernet/brocade/Kconfig"
-source "drivers/net/ethernet/cadence/Kconfig"
-source "drivers/net/ethernet/calxeda/Kconfig"
-source "drivers/net/ethernet/cavium/Kconfig"
-source "drivers/net/ethernet/chelsio/Kconfig"
-source "drivers/net/ethernet/cirrus/Kconfig"
-source "drivers/net/ethernet/cisco/Kconfig"
-source "drivers/net/ethernet/cortina/Kconfig"
config CX_ECAT
tristate "Beckhoff CX5020 EtherCAT master support"
@@ -57,6 +48,14 @@ config CX_ECAT
To compile this driver as a module, choose M here. The module
will be called ec_bhf.
+source "drivers/net/ethernet/broadcom/Kconfig"
+source "drivers/net/ethernet/cadence/Kconfig"
+source "drivers/net/ethernet/calxeda/Kconfig"
+source "drivers/net/ethernet/cavium/Kconfig"
+source "drivers/net/ethernet/chelsio/Kconfig"
+source "drivers/net/ethernet/cirrus/Kconfig"
+source "drivers/net/ethernet/cisco/Kconfig"
+source "drivers/net/ethernet/cortina/Kconfig"
source "drivers/net/ethernet/davicom/Kconfig"
config DNET
@@ -85,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
source "drivers/net/ethernet/i825xx/Kconfig"
source "drivers/net/ethernet/ibm/Kconfig"
source "drivers/net/ethernet/intel/Kconfig"
-source "drivers/net/ethernet/microsoft/Kconfig"
source "drivers/net/ethernet/xscale/Kconfig"
config JME
@@ -128,8 +126,9 @@ source "drivers/net/ethernet/mediatek/Kconfig"
source "drivers/net/ethernet/mellanox/Kconfig"
source "drivers/net/ethernet/micrel/Kconfig"
source "drivers/net/ethernet/microchip/Kconfig"
-source "drivers/net/ethernet/moxa/Kconfig"
source "drivers/net/ethernet/mscc/Kconfig"
+source "drivers/net/ethernet/microsoft/Kconfig"
+source "drivers/net/ethernet/moxa/Kconfig"
source "drivers/net/ethernet/myricom/Kconfig"
config FEALNX
@@ -141,10 +140,10 @@ config FEALNX
Say Y here to support the Myson MTD-800 family of PCI-based Ethernet
cards. <http://www.myson.com.tw/>
+source "drivers/net/ethernet/ni/Kconfig"
source "drivers/net/ethernet/natsemi/Kconfig"
source "drivers/net/ethernet/neterion/Kconfig"
source "drivers/net/ethernet/netronome/Kconfig"
-source "drivers/net/ethernet/ni/Kconfig"
source "drivers/net/ethernet/8390/Kconfig"
source "drivers/net/ethernet/nvidia/Kconfig"
source "drivers/net/ethernet/nxp/Kconfig"
@@ -164,6 +163,7 @@ source "drivers/net/ethernet/packetengines/Kconfig"
source "drivers/net/ethernet/pasemi/Kconfig"
source "drivers/net/ethernet/pensando/Kconfig"
source "drivers/net/ethernet/qlogic/Kconfig"
+source "drivers/net/ethernet/brocade/Kconfig"
source "drivers/net/ethernet/qualcomm/Kconfig"
source "drivers/net/ethernet/rdc/Kconfig"
source "drivers/net/ethernet/realtek/Kconfig"
@@ -171,10 +171,10 @@ source "drivers/net/ethernet/renesas/Kconfig"
source "drivers/net/ethernet/rocker/Kconfig"
source "drivers/net/ethernet/samsung/Kconfig"
source "drivers/net/ethernet/seeq/Kconfig"
-source "drivers/net/ethernet/sfc/Kconfig"
source "drivers/net/ethernet/sgi/Kconfig"
source "drivers/net/ethernet/silan/Kconfig"
source "drivers/net/ethernet/sis/Kconfig"
+source "drivers/net/ethernet/sfc/Kconfig"
source "drivers/net/ethernet/smsc/Kconfig"
source "drivers/net/ethernet/socionext/Kconfig"
source "drivers/net/ethernet/stmicro/Kconfig"
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 797a95142d1f..831833911a52 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -444,7 +444,7 @@ err_exit:
static int aq_pm_freeze(struct device *dev)
{
- return aq_suspend_common(dev, false);
+ return aq_suspend_common(dev, true);
}
static int aq_pm_suspend_poweroff(struct device *dev)
@@ -454,7 +454,7 @@ static int aq_pm_suspend_poweroff(struct device *dev)
static int aq_pm_thaw(struct device *dev)
{
- return atl_resume_common(dev, false);
+ return atl_resume_common(dev, true);
}
static int aq_pm_resume_restore(struct device *dev)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 77e76c9efd32..8201ce7adb77 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -346,7 +346,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
int budget)
{
struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
- bool is_rsc_completed = true;
int err = 0;
for (; (self->sw_head != self->hw_head) && budget;
@@ -364,12 +363,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
continue;
if (!buff->is_eop) {
+ unsigned int frag_cnt = 0U;
buff_ = buff;
do {
+ bool is_rsc_completed = true;
+
if (buff_->next >= self->size) {
err = -EIO;
goto err_exit;
}
+
+ frag_cnt++;
next_ = buff_->next,
buff_ = &self->buff_ring[next_];
is_rsc_completed =
@@ -377,18 +381,17 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
next_,
self->hw_head);
- if (unlikely(!is_rsc_completed))
- break;
+ if (unlikely(!is_rsc_completed) ||
+ frag_cnt > MAX_SKB_FRAGS) {
+ err = 0;
+ goto err_exit;
+ }
buff->is_error |= buff_->is_error;
buff->is_cso_err |= buff_->is_cso_err;
} while (!buff_->is_eop);
- if (!is_rsc_completed) {
- err = 0;
- goto err_exit;
- }
if (buff->is_error ||
(buff->is_lro && buff->is_cso_err)) {
buff_ = buff;
@@ -446,7 +449,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
ALIGN(hdr_len, sizeof(long)));
if (buff->len - hdr_len > 0) {
- skb_add_rx_frag(skb, 0, buff->rxdata.page,
+ skb_add_rx_frag(skb, i++, buff->rxdata.page,
buff->rxdata.pg_off + hdr_len,
buff->len - hdr_len,
AQ_CFG_RX_FRAME_MAX);
@@ -455,7 +458,6 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
if (!buff->is_eop) {
buff_ = buff;
- i = 1U;
do {
next_ = buff_->next;
buff_ = &self->buff_ring[next_];
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index d875ce3ec759..15ede7285fb5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -889,6 +889,13 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
err = -ENXIO;
goto err_exit;
}
+
+ /* Validate that the new hw_head_ is reasonable. */
+ if (hw_head_ >= ring->size) {
+ err = -ENXIO;
+ goto err_exit;
+ }
+
ring->hw_head = hw_head_;
err = aq_hw_err_from_flags(self);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 60dde29974bf..df51be3cbe06 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2585,8 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
device_set_wakeup_capable(&pdev->dev, 1);
priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol");
- if (IS_ERR(priv->wol_clk))
- return PTR_ERR(priv->wol_clk);
+ if (IS_ERR(priv->wol_clk)) {
+ ret = PTR_ERR(priv->wol_clk);
+ goto err_deregister_fixed_link;
+ }
/* Set the needed headroom once and for all */
BUILD_BUG_ON(sizeof(struct bcm_tsb) != 8);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c19b072f3a23..962253db25b8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -14153,10 +14153,6 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
/* Stop Tx */
bnx2x_tx_disable(bp);
- /* Delete all NAPI objects */
- bnx2x_del_all_napi(bp);
- if (CNIC_LOADED(bp))
- bnx2x_del_all_napi_cnic(bp);
netdev_reset_tc(bp->dev);
del_timer_sync(&bp->timer);
@@ -14261,6 +14257,11 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
bnx2x_drain_tx_queues(bp);
bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
bnx2x_netif_stop(bp, 1);
+ bnx2x_del_all_napi(bp);
+
+ if (CNIC_LOADED(bp))
+ bnx2x_del_all_napi_cnic(bp);
+
bnx2x_free_irq(bp);
/* Report UNLOAD_DONE to MCP */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 874fad0a5cf8..1d69fe0737a1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2707,6 +2707,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
u32 idx = le32_to_cpu(nqcmp->cq_handle_low);
struct bnxt_cp_ring_info *cpr2;
+ /* No more budget for RX work */
+ if (budget && work_done >= budget && idx == BNXT_RX_HDL)
+ break;
+
cpr2 = cpr->cp_ring_arr[idx];
work_done += __bnxt_poll_work(bp, cpr2,
budget - work_done);
@@ -10983,7 +10987,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
if (bp->flags & BNXT_FLAG_CHIP_P5)
return bnxt_rfs_supported(bp);
- if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp))
+ if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
return false;
vnics = 1 + bp->rx_nr_rings;
@@ -13234,10 +13238,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
goto init_dflt_ring_err;
bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
- if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
- bp->flags |= BNXT_FLAG_RFS;
- bp->dev->features |= NETIF_F_NTUPLE;
- }
+
+ bnxt_set_dflt_rfs(bp);
+
init_dflt_ring_err:
bnxt_ulp_irq_restart(bp, rc);
return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 9c2ad5e67a5d..00f2f80c0073 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -846,13 +846,6 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
if (rc)
return rc;
- if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
- bnxt_ptp_timecounter_init(bp, false);
- rc = bnxt_ptp_init_rtc(bp, phc_cfg);
- if (rc)
- goto out;
- }
-
if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
return 0;
@@ -861,8 +854,14 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
spin_lock_init(&ptp->ptp_lock);
- if (!(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+ if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+ bnxt_ptp_timecounter_init(bp, false);
+ rc = bnxt_ptp_init_rtc(bp, phc_cfg);
+ if (rc)
+ goto out;
+ } else {
bnxt_ptp_timecounter_init(bp, true);
+ }
ptp->ptp_info = bnxt_ptp_caps;
if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 9a41145dadfc..e87e46c47387 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2035,6 +2035,11 @@ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev,
return skb;
}
+static void bcmgenet_hide_tsb(struct sk_buff *skb)
+{
+ __skb_pull(skb, sizeof(struct status_64));
+}
+
static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -2141,6 +2146,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
}
GENET_CB(skb)->last_cb = tx_cb_ptr;
+
+ bcmgenet_hide_tsb(skb);
skb_tx_timestamp(skb);
/* Decrement total BD count and advance our write pointer */
@@ -3992,6 +3999,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
goto err;
}
priv->wol_irq = platform_get_irq_optional(pdev, 2);
+ if (priv->wol_irq == -EPROBE_DEFER) {
+ err = priv->wol_irq;
+ goto err;
+ }
priv->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(priv->base)) {
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index e475be29845c..61284baa0496 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1219,7 +1219,6 @@ static void gem_rx_refill(struct macb_queue *queue)
/* Make hw descriptor updates visible to CPU */
rmb();
- queue->rx_prepared_head++;
desc = macb_rx_desc(queue, entry);
if (!queue->rx_skbuff[entry]) {
@@ -1258,6 +1257,7 @@ static void gem_rx_refill(struct macb_queue *queue)
dma_wmb();
desc->addr &= ~MACB_BIT(RX_USED);
}
+ queue->rx_prepared_head++;
}
/* Make descriptor updates visible to hardware */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index f2f1ce81fd9c..0ec65ec634df 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -59,7 +59,7 @@ struct nicpf {
/* MSI-X */
u8 num_vec;
- bool irq_allocated[NIC_PF_MSIX_VECTORS];
+ unsigned int irq_allocated[NIC_PF_MSIX_VECTORS];
char irq_name[NIC_PF_MSIX_VECTORS][20];
};
@@ -1150,7 +1150,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
u64 intr;
u8 vf;
- if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0))
+ if (irq == nic->irq_allocated[NIC_PF_INTR_ID_MBOX0])
mbx = 0;
else
mbx = 1;
@@ -1176,14 +1176,14 @@ static void nic_free_all_interrupts(struct nicpf *nic)
for (irq = 0; irq < nic->num_vec; irq++) {
if (nic->irq_allocated[irq])
- free_irq(pci_irq_vector(nic->pdev, irq), nic);
- nic->irq_allocated[irq] = false;
+ free_irq(nic->irq_allocated[irq], nic);
+ nic->irq_allocated[irq] = 0;
}
}
static int nic_register_interrupts(struct nicpf *nic)
{
- int i, ret;
+ int i, ret, irq;
nic->num_vec = pci_msix_vec_count(nic->pdev);
/* Enable MSI-X */
@@ -1201,13 +1201,13 @@ static int nic_register_interrupts(struct nicpf *nic)
sprintf(nic->irq_name[i],
"NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
- ret = request_irq(pci_irq_vector(nic->pdev, i),
- nic_mbx_intr_handler, 0,
+ irq = pci_irq_vector(nic->pdev, i);
+ ret = request_irq(irq, nic_mbx_intr_handler, 0,
nic->irq_name[i], nic);
if (ret)
goto fail;
- nic->irq_allocated[i] = true;
+ nic->irq_allocated[i] = irq;
}
/* Enable mailbox interrupt */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index e7b4e3ed056c..8d719f82854a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
goto out;
na = ret;
- memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN));
+ memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN));
strim(p->id);
- memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN));
+ memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN));
strim(p->sn);
- memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN));
+ memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN));
strim(p->pn);
- memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN));
- strim((char *)p->na);
+ memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN));
+ strim(p->na);
out:
vfree(vpd);
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 79df5a72877b..0040dcaab945 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1399,8 +1399,10 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* alloc_etherdev ensures aligned and zeroed private structures */
dev = alloc_etherdev (sizeof (*tp));
- if (!dev)
+ if (!dev) {
+ pci_disable_device(pdev);
return -ENOMEM;
+ }
SET_NETDEV_DEV(dev, &pdev->dev);
if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
@@ -1785,6 +1787,7 @@ err_out_free_res:
err_out_free_netdev:
free_netdev (dev);
+ pci_disable_device(pdev);
return -ENODEV;
}
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index caf48023f8ea..5231818943c6 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1928,6 +1928,11 @@ static int ftgmac100_probe(struct platform_device *pdev)
/* AST2400 doesn't have working HW checksum generation */
if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
+ /* AST2600 tx checksum with NCSI is broken */
+ if (priv->use_ncsi && of_device_is_compatible(np, "aspeed,ast2600-mac"))
+ netdev->hw_features &= ~NETIF_F_HW_CSUM;
+
if (np && of_get_property(np, "no-hw-checksum", NULL))
netdev->hw_features &= ~(NETIF_F_HW_CSUM | NETIF_F_RXCSUM);
netdev->features |= netdev->hw_features;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 79afb1d7289b..9182631856d5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -297,10 +297,6 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
if (tc < 0 || tc >= priv->num_tx_rings)
return -EINVAL;
- /* Do not support TXSTART and TX CSUM offload simutaniously */
- if (ndev->features & NETIF_F_CSUM_MASK)
- return -EBUSY;
-
/* TSD and Qbv are mutually exclusive in hardware */
if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE)
return -EBUSY;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 11227f51404c..9f33ec838b52 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3731,7 +3731,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
ARRAY_SIZE(out_val));
if (ret) {
dev_dbg(&fep->pdev->dev, "no stop mode property\n");
- return ret;
+ goto out;
}
fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 7edf8569514c..928d934cb21a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -1065,19 +1065,23 @@ int hns_mac_init(struct dsaf_device *dsaf_dev)
device_for_each_child_node(dsaf_dev->dev, child) {
ret = fwnode_property_read_u32(child, "reg", &port_id);
if (ret) {
+ fwnode_handle_put(child);
dev_err(dsaf_dev->dev,
"get reg fail, ret=%d!\n", ret);
return ret;
}
if (port_id >= max_port_num) {
+ fwnode_handle_put(child);
dev_err(dsaf_dev->dev,
"reg(%u) out of range!\n", port_id);
return -EINVAL;
}
mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb),
GFP_KERNEL);
- if (!mac_cb)
+ if (!mac_cb) {
+ fwnode_handle_put(child);
return -ENOMEM;
+ }
mac_cb->fw_port = child;
mac_cb->mac_id = (u8)port_id;
dsaf_dev->mac_cb[port_id] = mac_cb;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
index 0c60f41fca8a..f3c9395d8351 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
@@ -75,7 +75,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
ret = hclge_comm_cmd_send(hw, &desc, 1);
if (ret) {
dev_err(&hw->cmq.csq.pdev->dev,
- "failed to get tqp stat, ret = %d, tx = %u.\n",
+ "failed to get tqp stat, ret = %d, rx = %u.\n",
ret, i);
return ret;
}
@@ -89,7 +89,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
ret = hclge_comm_cmd_send(hw, &desc, 1);
if (ret) {
dev_err(&hw->cmq.csq.pdev->dev,
- "failed to get tqp stat, ret = %d, rx = %u.\n",
+ "failed to get tqp stat, ret = %d, tx = %u.\n",
ret, i);
return ret;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 44d9b560b337..93aeb615191d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -562,12 +562,12 @@ static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf,
for (i = 0; i < ring_num; i++) {
j = 0;
- sprintf(result[j++], "%8u", i);
- sprintf(result[j++], "%9u", ring->tx_copybreak);
- sprintf(result[j++], "%3u", tx_spare->len);
- sprintf(result[j++], "%3u", tx_spare->next_to_use);
- sprintf(result[j++], "%3u", tx_spare->next_to_clean);
- sprintf(result[j++], "%3u", tx_spare->last_to_clean);
+ sprintf(result[j++], "%u", i);
+ sprintf(result[j++], "%u", ring->tx_copybreak);
+ sprintf(result[j++], "%u", tx_spare->len);
+ sprintf(result[j++], "%u", tx_spare->next_to_use);
+ sprintf(result[j++], "%u", tx_spare->next_to_clean);
+ sprintf(result[j++], "%u", tx_spare->last_to_clean);
sprintf(result[j++], "%pad", &tx_spare->dma);
hns3_dbg_fill_content(content, sizeof(content),
tx_spare_info_items,
@@ -598,35 +598,35 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring,
u32 base_add_l, base_add_h;
u32 j = 0;
- sprintf(result[j++], "%8u", index);
+ sprintf(result[j++], "%u", index);
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_BD_NUM_REG));
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_BD_LEN_REG));
- sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_TAIL_REG));
- sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_HEAD_REG));
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_FBDNUM_REG));
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_PKTNUM_RECORD_REG));
- sprintf(result[j++], "%9u", ring->rx_copybreak);
+ sprintf(result[j++], "%u", ring->rx_copybreak);
- sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
HNS3_RING_EN_REG) ? "on" : "off");
if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev))
- sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_EN_REG) ? "on" : "off");
else
- sprintf(result[j++], "%10s", "NA");
+ sprintf(result[j++], "%s", "NA");
base_add_h = readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_BASEADDR_H_REG);
@@ -700,36 +700,36 @@ static void hns3_dump_tx_queue_info(struct hns3_enet_ring *ring,
u32 base_add_l, base_add_h;
u32 j = 0;
- sprintf(result[j++], "%8u", index);
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", index);
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_BD_NUM_REG));
- sprintf(result[j++], "%2u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_TC_REG));
- sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_TAIL_REG));
- sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_HEAD_REG));
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_FBDNUM_REG));
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_OFFSET_REG));
- sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_PKTNUM_RECORD_REG));
- sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
HNS3_RING_EN_REG) ? "on" : "off");
if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev))
- sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base +
+ sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_EN_REG) ? "on" : "off");
else
- sprintf(result[j++], "%10s", "NA");
+ sprintf(result[j++], "%s", "NA");
base_add_h = readl_relaxed(ring->tqp->io_base +
HNS3_RING_TX_RING_BASEADDR_H_REG);
@@ -848,15 +848,15 @@ static void hns3_dump_rx_bd_info(struct hns3_nic_priv *priv,
{
unsigned int j = 0;
- sprintf(result[j++], "%5d", idx);
+ sprintf(result[j++], "%d", idx);
sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.l234_info));
- sprintf(result[j++], "%7u", le16_to_cpu(desc->rx.pkt_len));
- sprintf(result[j++], "%4u", le16_to_cpu(desc->rx.size));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->rx.pkt_len));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->rx.size));
sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.rss_hash));
- sprintf(result[j++], "%5u", le16_to_cpu(desc->rx.fd_id));
- sprintf(result[j++], "%8u", le16_to_cpu(desc->rx.vlan_tag));
- sprintf(result[j++], "%15u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb));
- sprintf(result[j++], "%11u", le16_to_cpu(desc->rx.ot_vlan_tag));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->rx.fd_id));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->rx.vlan_tag));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->rx.ot_vlan_tag));
sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.bd_base_info));
if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) {
u32 ol_info = le32_to_cpu(desc->rx.ol_info);
@@ -930,19 +930,19 @@ static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv,
{
unsigned int j = 0;
- sprintf(result[j++], "%6d", idx);
+ sprintf(result[j++], "%d", idx);
sprintf(result[j++], "%#llx", le64_to_cpu(desc->addr));
- sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.vlan_tag));
- sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.send_size));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->tx.vlan_tag));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->tx.send_size));
sprintf(result[j++], "%#x",
le32_to_cpu(desc->tx.type_cs_vlan_tso_len));
- sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.outer_vlan_tag));
- sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.tv));
- sprintf(result[j++], "%10u",
+ sprintf(result[j++], "%u", le16_to_cpu(desc->tx.outer_vlan_tag));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->tx.tv));
+ sprintf(result[j++], "%u",
le32_to_cpu(desc->tx.ol_type_vlan_len_msec));
sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.paylen_ol4cs));
sprintf(result[j++], "%#x", le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri));
- sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.mss_hw_csum));
+ sprintf(result[j++], "%u", le16_to_cpu(desc->tx.mss_hw_csum));
}
static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 14dc12c2155d..a3ee7875d6a7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -5203,6 +5203,13 @@ static void hns3_state_init(struct hnae3_handle *handle)
set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
}
+static void hns3_state_uninit(struct hnae3_handle *handle)
+{
+ struct hns3_nic_priv *priv = handle->priv;
+
+ clear_bit(HNS3_NIC_STATE_INITED, &priv->state);
+}
+
static int hns3_client_init(struct hnae3_handle *handle)
{
struct pci_dev *pdev = handle->pdev;
@@ -5320,7 +5327,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
return ret;
out_reg_netdev_fail:
+ hns3_state_uninit(handle);
hns3_dbg_uninit(handle);
+ hns3_client_stop(handle);
out_client_start:
hns3_free_rx_cpu_rmap(netdev);
hns3_nic_uninit_irq(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 6799d16de34b..7998ca617a92 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -94,6 +94,13 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
enum hclge_comm_cmd_status status;
struct hclge_desc desc;
+ if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) {
+ dev_err(&hdev->pdev->dev,
+ "msg data length(=%u) exceeds maximum(=%u)\n",
+ msg_len, HCLGE_MBX_MAX_MSG_SIZE);
+ return -EMSGSIZE;
+ }
+
resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false);
@@ -176,7 +183,7 @@ static int hclge_get_ring_chain_from_mbx(
ring_num = req->msg.ring_num;
if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM)
- return -ENOMEM;
+ return -EINVAL;
for (i = 0; i < ring_num; i++) {
if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) {
@@ -587,9 +594,9 @@ static int hclge_set_vf_mtu(struct hclge_vport *vport,
return hclge_set_vport_mtu(vport, mtu);
}
-static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
- struct hclge_respond_to_vf_msg *resp_msg)
+static int hclge_get_queue_id_in_pf(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ struct hclge_respond_to_vf_msg *resp_msg)
{
struct hnae3_handle *handle = &vport->nic;
struct hclge_dev *hdev = vport->back;
@@ -599,17 +606,18 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport,
if (queue_id >= handle->kinfo.num_tqps) {
dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n",
queue_id, mbx_req->mbx_src_vfid);
- return;
+ return -EINVAL;
}
qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id);
memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf));
resp_msg->len = sizeof(qid_in_pf);
+ return 0;
}
-static void hclge_get_rss_key(struct hclge_vport *vport,
- struct hclge_mbx_vf_to_pf_cmd *mbx_req,
- struct hclge_respond_to_vf_msg *resp_msg)
+static int hclge_get_rss_key(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+ struct hclge_respond_to_vf_msg *resp_msg)
{
#define HCLGE_RSS_MBX_RESP_LEN 8
struct hclge_dev *hdev = vport->back;
@@ -627,13 +635,14 @@ static void hclge_get_rss_key(struct hclge_vport *vport,
dev_warn(&hdev->pdev->dev,
"failed to get the rss hash key, the index(%u) invalid !\n",
index);
- return;
+ return -EINVAL;
}
memcpy(resp_msg->data,
&rss_cfg->rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN],
HCLGE_RSS_MBX_RESP_LEN);
resp_msg->len = HCLGE_RSS_MBX_RESP_LEN;
+ return 0;
}
static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code)
@@ -809,10 +818,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
"VF fail(%d) to set mtu\n", ret);
break;
case HCLGE_MBX_GET_QID_IN_PF:
- hclge_get_queue_id_in_pf(vport, req, &resp_msg);
+ ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg);
break;
case HCLGE_MBX_GET_RSS_KEY:
- hclge_get_rss_key(vport, req, &resp_msg);
+ ret = hclge_get_rss_key(vport, req, &resp_msg);
break;
case HCLGE_MBX_GET_LINK_MODE:
hclge_get_link_mode(vport, req);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 2d9b06d7caad..f7dc7d825f63 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -771,7 +771,7 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
/* If we only have one page, still need to get shadown wqe when
* wqe rolling-over page
*/
- if (curr_pg != end_pg || MASKED_WQE_IDX(wq, end_prod_idx) < *prod_idx) {
+ if (curr_pg != end_pg || end_prod_idx < *prod_idx) {
void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx);
@@ -841,7 +841,10 @@ struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
*cons_idx = curr_cons_idx;
- if (curr_pg != end_pg) {
+ /* If we only have one page, still need to get shadown wqe when
+ * wqe rolling-over page
+ */
+ if (curr_pg != end_pg || end_cons_idx < curr_cons_idx) {
void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 77683909ca3d..5c5931dba51d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3210,13 +3210,8 @@ static void ibmvnic_get_ringparam(struct net_device *netdev,
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
- ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
- ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
- } else {
- ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
- ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ;
- }
+ ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+ ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
ring->rx_mini_max_pending = 0;
ring->rx_jumbo_max_pending = 0;
ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
@@ -3231,23 +3226,21 @@ static int ibmvnic_set_ringparam(struct net_device *netdev,
struct netlink_ext_ack *extack)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- int ret;
- ret = 0;
+ if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq ||
+ ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
+ netdev_err(netdev, "Invalid request.\n");
+ netdev_err(netdev, "Max tx buffers = %llu\n",
+ adapter->max_rx_add_entries_per_subcrq);
+ netdev_err(netdev, "Max rx buffers = %llu\n",
+ adapter->max_tx_entries_per_subcrq);
+ return -EINVAL;
+ }
+
adapter->desired.rx_entries = ring->rx_pending;
adapter->desired.tx_entries = ring->tx_pending;
- ret = wait_for_reset(adapter);
-
- if (!ret &&
- (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending ||
- adapter->req_tx_entries_per_subcrq != ring->tx_pending))
- netdev_info(netdev,
- "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
- ring->rx_pending, ring->tx_pending,
- adapter->req_rx_add_entries_per_subcrq,
- adapter->req_tx_entries_per_subcrq);
- return ret;
+ return wait_for_reset(adapter);
}
static void ibmvnic_get_channels(struct net_device *netdev,
@@ -3255,14 +3248,8 @@ static void ibmvnic_get_channels(struct net_device *netdev,
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) {
- channels->max_rx = adapter->max_rx_queues;
- channels->max_tx = adapter->max_tx_queues;
- } else {
- channels->max_rx = IBMVNIC_MAX_QUEUES;
- channels->max_tx = IBMVNIC_MAX_QUEUES;
- }
-
+ channels->max_rx = adapter->max_rx_queues;
+ channels->max_tx = adapter->max_tx_queues;
channels->max_other = 0;
channels->max_combined = 0;
channels->rx_count = adapter->req_rx_queues;
@@ -3275,22 +3262,11 @@ static int ibmvnic_set_channels(struct net_device *netdev,
struct ethtool_channels *channels)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- int ret;
- ret = 0;
adapter->desired.rx_queues = channels->rx_count;
adapter->desired.tx_queues = channels->tx_count;
- ret = wait_for_reset(adapter);
-
- if (!ret &&
- (adapter->req_rx_queues != channels->rx_count ||
- adapter->req_tx_queues != channels->tx_count))
- netdev_info(netdev,
- "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n",
- channels->rx_count, channels->tx_count,
- adapter->req_rx_queues, adapter->req_tx_queues);
- return ret;
+ return wait_for_reset(adapter);
}
static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -3298,43 +3274,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
struct ibmvnic_adapter *adapter = netdev_priv(dev);
int i;
- switch (stringset) {
- case ETH_SS_STATS:
- for (i = 0; i < ARRAY_SIZE(ibmvnic_stats);
- i++, data += ETH_GSTRING_LEN)
- memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+ if (stringset != ETH_SS_STATS)
+ return;
- for (i = 0; i < adapter->req_tx_queues; i++) {
- snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
- data += ETH_GSTRING_LEN;
+ for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
+ memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
- snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
- data += ETH_GSTRING_LEN;
+ for (i = 0; i < adapter->req_tx_queues; i++) {
+ snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+ data += ETH_GSTRING_LEN;
- snprintf(data, ETH_GSTRING_LEN,
- "tx%d_dropped_packets", i);
- data += ETH_GSTRING_LEN;
- }
+ snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+ data += ETH_GSTRING_LEN;
- for (i = 0; i < adapter->req_rx_queues; i++) {
- snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
- data += ETH_GSTRING_LEN;
+ snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
+ data += ETH_GSTRING_LEN;
+ }
- snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
- data += ETH_GSTRING_LEN;
+ for (i = 0; i < adapter->req_rx_queues; i++) {
+ snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+ data += ETH_GSTRING_LEN;
- snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
- data += ETH_GSTRING_LEN;
- }
- break;
+ snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+ data += ETH_GSTRING_LEN;
- case ETH_SS_PRIV_FLAGS:
- for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++)
- strcpy(data + i * ETH_GSTRING_LEN,
- ibmvnic_priv_flags[i]);
- break;
- default:
- return;
+ snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+ data += ETH_GSTRING_LEN;
}
}
@@ -3347,8 +3312,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
return ARRAY_SIZE(ibmvnic_stats) +
adapter->req_tx_queues * NUM_TX_STATS +
adapter->req_rx_queues * NUM_RX_STATS;
- case ETH_SS_PRIV_FLAGS:
- return ARRAY_SIZE(ibmvnic_priv_flags);
default:
return -EOPNOTSUPP;
}
@@ -3401,26 +3364,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
}
}
-static u32 ibmvnic_get_priv_flags(struct net_device *netdev)
-{
- struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-
- return adapter->priv_flags;
-}
-
-static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags)
-{
- struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES);
-
- if (which_maxes)
- adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES;
- else
- adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES;
-
- return 0;
-}
-
static const struct ethtool_ops ibmvnic_ethtool_ops = {
.get_drvinfo = ibmvnic_get_drvinfo,
.get_msglevel = ibmvnic_get_msglevel,
@@ -3434,8 +3377,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = {
.get_sset_count = ibmvnic_get_sset_count,
.get_ethtool_stats = ibmvnic_get_ethtool_stats,
.get_link_ksettings = ibmvnic_get_link_ksettings,
- .get_priv_flags = ibmvnic_get_priv_flags,
- .set_priv_flags = ibmvnic_set_priv_flags,
};
/* Routines for managing CRQs/sCRQs */
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 8f5cefb932dd..1310c861bf83 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -41,11 +41,6 @@
#define IBMVNIC_RESET_DELAY 100
-static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = {
-#define IBMVNIC_USE_SERVER_MAXES 0x1
- "use-server-maxes"
-};
-
struct ibmvnic_login_buffer {
__be32 len;
__be32 version;
@@ -883,7 +878,6 @@ struct ibmvnic_adapter {
struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
dma_addr_t ip_offload_ctrl_tok;
u32 msg_enable;
- u32 priv_flags;
/* Vital Product Data (VPD) */
struct ibmvnic_vpd *vpd;
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index d60e2016d03c..e6c8e6d5234f 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1009,8 +1009,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
{
u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) |
link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND;
- u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */
- u16 lat_enc_d = 0; /* latency decoded */
+ u32 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */
+ u32 lat_enc_d = 0; /* latency decoded */
u16 lat_enc = 0; /* latency encoded */
if (link) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6778df2177a1..98871f014994 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7549,42 +7549,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
struct i40e_fwd_adapter *fwd)
{
+ struct i40e_channel *ch = NULL, *ch_tmp, *iter;
int ret = 0, num_tc = 1, i, aq_err;
- struct i40e_channel *ch, *ch_tmp;
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
- if (list_empty(&vsi->macvlan_list))
- return -EINVAL;
-
/* Go through the list and find an available channel */
- list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
- if (!i40e_is_channel_macvlan(ch)) {
- ch->fwd = fwd;
+ list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
+ if (!i40e_is_channel_macvlan(iter)) {
+ iter->fwd = fwd;
/* record configuration for macvlan interface in vdev */
for (i = 0; i < num_tc; i++)
netdev_bind_sb_channel_queue(vsi->netdev, vdev,
i,
- ch->num_queue_pairs,
- ch->base_queue);
- for (i = 0; i < ch->num_queue_pairs; i++) {
+ iter->num_queue_pairs,
+ iter->base_queue);
+ for (i = 0; i < iter->num_queue_pairs; i++) {
struct i40e_ring *tx_ring, *rx_ring;
u16 pf_q;
- pf_q = ch->base_queue + i;
+ pf_q = iter->base_queue + i;
/* Get to TX ring ptr */
tx_ring = vsi->tx_rings[pf_q];
- tx_ring->ch = ch;
+ tx_ring->ch = iter;
/* Get the RX ring ptr */
rx_ring = vsi->rx_rings[pf_q];
- rx_ring->ch = ch;
+ rx_ring->ch = iter;
}
+ ch = iter;
break;
}
}
+ if (!ch)
+ return -EINVAL;
+
/* Guarantee all rings are updated before we update the
* MAC address filter.
*/
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 8ed3c9ab7ff7..a895e3a8e988 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -540,6 +540,7 @@ struct ice_pf {
struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */
struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
struct mutex tc_mutex; /* lock to protect TC changes */
+ struct mutex adev_mutex; /* lock to protect aux device access */
u32 msg_enable;
struct ice_ptp ptp;
struct tty_driver *ice_gnss_tty_driver;
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index 9a84d746a6c4..6a463b242c7d 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -361,7 +361,8 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
np = netdev_priv(netdev);
vsi = np->vsi;
- if (ice_is_reset_in_progress(vsi->back->state))
+ if (ice_is_reset_in_progress(vsi->back->state) ||
+ test_bit(ICE_VF_DIS, vsi->back->state))
return NETDEV_TX_BUSY;
repr = ice_netdev_to_repr(netdev);
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
index bd58d9d2e565..6a413331572b 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.h
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -52,7 +52,7 @@ static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { }
static inline int ice_eswitch_configure(struct ice_pf *pf)
{
- return -EOPNOTSUPP;
+ return 0;
}
static inline int ice_eswitch_rebuild(struct ice_pf *pf)
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index 25a436d342c2..3e3b2ed4cd5d 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -37,14 +37,17 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
if (WARN_ON_ONCE(!in_task()))
return;
+ mutex_lock(&pf->adev_mutex);
if (!pf->adev)
- return;
+ goto finish;
device_lock(&pf->adev->dev);
iadrv = ice_get_auxiliary_drv(pf);
if (iadrv && iadrv->event_handler)
iadrv->event_handler(pf, event);
device_unlock(&pf->adev->dev);
+finish:
+ mutex_unlock(&pf->adev_mutex);
}
/**
@@ -290,7 +293,6 @@ int ice_plug_aux_dev(struct ice_pf *pf)
return -ENOMEM;
adev = &iadev->adev;
- pf->adev = adev;
iadev->pf = pf;
adev->id = pf->aux_idx;
@@ -300,18 +302,20 @@ int ice_plug_aux_dev(struct ice_pf *pf)
ret = auxiliary_device_init(adev);
if (ret) {
- pf->adev = NULL;
kfree(iadev);
return ret;
}
ret = auxiliary_device_add(adev);
if (ret) {
- pf->adev = NULL;
auxiliary_device_uninit(adev);
return ret;
}
+ mutex_lock(&pf->adev_mutex);
+ pf->adev = adev;
+ mutex_unlock(&pf->adev_mutex);
+
return 0;
}
@@ -320,12 +324,17 @@ int ice_plug_aux_dev(struct ice_pf *pf)
*/
void ice_unplug_aux_dev(struct ice_pf *pf)
{
- if (!pf->adev)
- return;
+ struct auxiliary_device *adev;
- auxiliary_device_delete(pf->adev);
- auxiliary_device_uninit(pf->adev);
+ mutex_lock(&pf->adev_mutex);
+ adev = pf->adev;
pf->adev = NULL;
+ mutex_unlock(&pf->adev_mutex);
+
+ if (adev) {
+ auxiliary_device_delete(adev);
+ auxiliary_device_uninit(adev);
+ }
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 6d19c58ccacd..454e01ae09b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3043,8 +3043,8 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
ice_for_each_q_vector(vsi, i) {
struct ice_q_vector *q_vector = vsi->q_vectors[i];
- coalesce[i].itr_tx = q_vector->tx.itr_setting;
- coalesce[i].itr_rx = q_vector->rx.itr_setting;
+ coalesce[i].itr_tx = q_vector->tx.itr_settings;
+ coalesce[i].itr_rx = q_vector->rx.itr_settings;
coalesce[i].intrl = q_vector->intrl;
if (i < vsi->num_txq)
@@ -3100,21 +3100,21 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
*/
if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
rc = &vsi->q_vectors[i]->rx;
- rc->itr_setting = coalesce[i].itr_rx;
+ rc->itr_settings = coalesce[i].itr_rx;
ice_write_itr(rc, rc->itr_setting);
} else if (i < vsi->alloc_rxq) {
rc = &vsi->q_vectors[i]->rx;
- rc->itr_setting = coalesce[0].itr_rx;
+ rc->itr_settings = coalesce[0].itr_rx;
ice_write_itr(rc, rc->itr_setting);
}
if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
rc = &vsi->q_vectors[i]->tx;
- rc->itr_setting = coalesce[i].itr_tx;
+ rc->itr_settings = coalesce[i].itr_tx;
ice_write_itr(rc, rc->itr_setting);
} else if (i < vsi->alloc_txq) {
rc = &vsi->q_vectors[i]->tx;
- rc->itr_setting = coalesce[0].itr_tx;
+ rc->itr_settings = coalesce[0].itr_tx;
ice_write_itr(rc, rc->itr_setting);
}
@@ -3128,12 +3128,12 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
for (; i < vsi->num_q_vectors; i++) {
/* transmit */
rc = &vsi->q_vectors[i]->tx;
- rc->itr_setting = coalesce[0].itr_tx;
+ rc->itr_settings = coalesce[0].itr_tx;
ice_write_itr(rc, rc->itr_setting);
/* receive */
rc = &vsi->q_vectors[i]->rx;
- rc->itr_setting = coalesce[0].itr_rx;
+ rc->itr_settings = coalesce[0].itr_rx;
ice_write_itr(rc, rc->itr_setting);
vsi->q_vectors[i]->intrl = coalesce[0].intrl;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5b1198859da7..963a5f40e071 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3769,6 +3769,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf)
static void ice_deinit_pf(struct ice_pf *pf)
{
ice_service_task_stop(pf);
+ mutex_destroy(&pf->adev_mutex);
mutex_destroy(&pf->sw_mutex);
mutex_destroy(&pf->tc_mutex);
mutex_destroy(&pf->avail_q_mutex);
@@ -3847,6 +3848,7 @@ static int ice_init_pf(struct ice_pf *pf)
mutex_init(&pf->sw_mutex);
mutex_init(&pf->tc_mutex);
+ mutex_init(&pf->adev_mutex);
INIT_HLIST_HEAD(&pf->aq_wait_list);
spin_lock_init(&pf->aq_wait_lock);
@@ -6170,9 +6172,10 @@ static int ice_up_complete(struct ice_vsi *vsi)
ice_ptp_link_change(pf, pf->hw.pf_id, true);
}
- /* clear this now, and the first stats read will be used as baseline */
- vsi->stat_offsets_loaded = false;
-
+ /* Perform an initial read of the statistics registers now to
+ * set the baseline so counters are ready when interface is up
+ */
+ ice_update_eth_stats(vsi);
ice_service_task_schedule(pf);
return 0;
@@ -6929,12 +6932,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
+#define ICE_EMP_RESET_SLEEP_MS 5000
if (reset_type == ICE_RESET_EMPR) {
/* If an EMP reset has occurred, any previously pending flash
* update will have completed. We no longer know whether or
* not the NVM update EMP reset is restricted.
*/
pf->fw_emp_reset_disabled = false;
+
+ msleep(ICE_EMP_RESET_SLEEP_MS);
}
err = ice_init_all_ctrlq(hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 4eb0599714f4..13cdb5ea594d 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -641,6 +641,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank,
status = ice_read_flash_module(hw, bank, ICE_SR_1ST_OROM_BANK_PTR, 0,
orom_data, hw->flash.banks.orom_size);
if (status) {
+ vfree(orom_data);
ice_debug(hw, ICE_DBG_NVM, "Unable to read Option ROM data\n");
return status;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index a1cd33273ca4..662947c882e8 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -500,12 +500,19 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
* This function must be called periodically to ensure that the cached value
* is never more than 2 seconds old. It must also be called whenever the PHC
* time has been changed.
+ *
+ * Return:
+ * * 0 - OK, successfully updated
+ * * -EAGAIN - PF was busy, need to reschedule the update
*/
-static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
+static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
{
u64 systime;
int i;
+ if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+ return -EAGAIN;
+
/* Read the current PHC time */
systime = ice_ptp_read_src_clk_reg(pf, NULL);
@@ -528,6 +535,9 @@ static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
}
}
+ clear_bit(ICE_CFG_BUSY, pf->state);
+
+ return 0;
}
/**
@@ -2287,6 +2297,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
/**
* ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
+ * @hw: pointer to the hw struct
* @tx: PTP Tx tracker to clean up
*
* Loop through the Tx timestamp requests and see if any of them have been
@@ -2295,7 +2306,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
* timestamp will never be captured. This might happen if the packet gets
* discarded before it reaches the PHY timestamping block.
*/
-static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
+static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx)
{
u8 idx;
@@ -2304,11 +2315,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
for_each_set_bit(idx, tx->in_use, tx->len) {
struct sk_buff *skb;
+ u64 raw_tstamp;
/* Check if this SKB has been waiting for too long */
if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
continue;
+ /* Read tstamp to be able to use this register again */
+ ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset,
+ &raw_tstamp);
+
spin_lock(&tx->lock);
skb = tx->tstamps[idx].skb;
tx->tstamps[idx].skb = NULL;
@@ -2324,17 +2340,18 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
{
struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+ int err;
if (!test_bit(ICE_FLAG_PTP, pf->flags))
return;
- ice_ptp_update_cached_phctime(pf);
+ err = ice_ptp_update_cached_phctime(pf);
- ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx);
+ ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx);
- /* Run twice a second */
+ /* Run twice a second or reschedule if phc update failed */
kthread_queue_delayed_work(ptp->kworker, &ptp->work,
- msecs_to_jiffies(500));
+ msecs_to_jiffies(err ? 10 : 500));
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 8915a9d39e36..0c438219f7a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1046,8 +1046,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!num_vfs) {
if (!pci_vfs_assigned(pdev)) {
- ice_mbx_deinit_snapshot(&pf->hw);
ice_free_vfs(pf);
+ ice_mbx_deinit_snapshot(&pf->hw);
if (pf->lag)
ice_enable_lag(pf->lag);
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index cead3eb149bd..ffb3f6a589da 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -384,9 +384,14 @@ struct ice_ring_container {
/* this matches the maximum number of ITR bits, but in usec
* values, so it is shifted left one bit (bit zero is ignored)
*/
- u16 itr_setting:13;
- u16 itr_reserved:2;
- u16 itr_mode:1;
+ union {
+ struct {
+ u16 itr_setting:13;
+ u16 itr_reserved:2;
+ u16 itr_mode:1;
+ };
+ u16 itr_settings;
+ };
enum ice_container_type type;
};
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 69ff4b929772..2889e050a4c9 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -1308,12 +1308,51 @@ error_param:
}
/**
+ * ice_vf_vsi_dis_single_txq - disable a single Tx queue
+ * @vf: VF to disable queue for
+ * @vsi: VSI for the VF
+ * @q_id: VF relative (0-based) queue ID
+ *
+ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
+ * disabled then clear q_id bit in the enabled queues bitmap and return
+ * success. Otherwise return error.
+ */
+static int
+ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
+{
+ struct ice_txq_meta txq_meta = { 0 };
+ struct ice_tx_ring *ring;
+ int err;
+
+ if (!test_bit(q_id, vf->txq_ena))
+ dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+ q_id, vsi->vsi_num);
+
+ ring = vsi->tx_rings[q_id];
+ if (!ring)
+ return -EINVAL;
+
+ ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
+ if (err) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+ q_id, vsi->vsi_num);
+ return err;
+ }
+
+ /* Clear enabled queues flag */
+ clear_bit(q_id, vf->txq_ena);
+
+ return 0;
+}
+
+/**
* ice_vc_dis_qs_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
*
- * called from the VF to disable all or specific
- * queue(s)
+ * called from the VF to disable all or specific queue(s)
*/
static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
{
@@ -1350,30 +1389,15 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
q_map = vqs->tx_queues;
for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
- struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id];
- struct ice_txq_meta txq_meta = { 0 };
-
if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- if (!test_bit(vf_q_id, vf->txq_ena))
- dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
- vf_q_id, vsi->vsi_num);
-
- ice_fill_txq_meta(vsi, ring, &txq_meta);
-
- if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id,
- ring, &txq_meta)) {
- dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
- vf_q_id, vsi->vsi_num);
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
-
- /* Clear enabled queues flag */
- clear_bit(vf_q_id, vf->txq_ena);
}
}
@@ -1622,6 +1646,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
if (qpi->txq.ring_len > 0) {
vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
vsi->tx_rings[i]->count = qpi->txq.ring_len;
+
+ /* Disable any existing queue first */
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* Configure a queue with the requested settings */
if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -3625,6 +3657,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
return;
}
+ mutex_lock(&vf->cfg_lock);
+
/* Check if VF is disabled. */
if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
err = -EPERM;
@@ -3642,32 +3676,20 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
err = -EINVAL;
}
- if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
- ice_vc_send_msg_to_vf(vf, v_opcode,
- VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
- 0);
- ice_put_vf(vf);
- return;
- }
-
error_handler:
if (err) {
ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
NULL, 0);
dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
vf_id, v_opcode, msglen, err);
- ice_put_vf(vf);
- return;
+ goto finish;
}
- /* VF is being configured in another context that triggers a VFR, so no
- * need to process this message
- */
- if (!mutex_trylock(&vf->cfg_lock)) {
- dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
- vf->vf_id);
- ice_put_vf(vf);
- return;
+ if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
+ ice_vc_send_msg_to_vf(vf, v_opcode,
+ VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
+ 0);
+ goto finish;
}
switch (v_opcode) {
@@ -3780,6 +3802,7 @@ error_handler:
vf_id, v_opcode, err);
}
+finish:
mutex_unlock(&vf->cfg_lock);
ice_put_vf(vf);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 866ee4df9671..9dd38f667059 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -415,8 +415,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
*/
static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
{
+ u32 nb_buffs_extra = 0, nb_buffs = 0;
union ice_32b_rx_flex_desc *rx_desc;
- u32 nb_buffs_extra = 0, nb_buffs;
u16 ntu = rx_ring->next_to_use;
u16 total_count = count;
struct xdp_buff **xdp;
@@ -428,6 +428,10 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
rx_desc,
rx_ring->count - ntu);
+ if (nb_buffs_extra != rx_ring->count - ntu) {
+ ntu += nb_buffs_extra;
+ goto exit;
+ }
rx_desc = ICE_RX_DESC(rx_ring, 0);
xdp = ice_xdp_buf(rx_ring, 0);
ntu = 0;
@@ -441,6 +445,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
if (ntu == rx_ring->count)
ntu = 0;
+exit:
if (rx_ring->next_to_use != ntu)
ice_release_rx_desc(rx_ring, ntu);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 34b33b21e0dc..68be2976f539 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5505,7 +5505,8 @@ static void igb_watchdog_task(struct work_struct *work)
break;
}
- if (adapter->link_speed != SPEED_1000)
+ if (adapter->link_speed != SPEED_1000 ||
+ !hw->phy.ops.read_reg)
goto no_wait;
/* wait for Remote receiver status OK */
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
index 66ea566488d1..59d5c467ea6e 100644
--- a/drivers/net/ethernet/intel/igc/igc_i225.c
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
{
u32 swfw_sync;
- while (igc_get_hw_semaphore_i225(hw))
- ; /* Empty */
+ /* Releasing the resource requires first getting the HW semaphore.
+ * If we fail to get the semaphore, there is nothing we can do,
+ * except log an error and quit. We are not allowed to hang here
+ * indefinitely, as it may cause denial of service or system crash.
+ */
+ if (igc_get_hw_semaphore_i225(hw)) {
+ hw_dbg("Failed to release SW_FW_SYNC.\n");
+ return;
+ }
swfw_sync = rd32(IGC_SW_FW_SYNC);
swfw_sync &= ~mask;
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 40dbf4b43234..6961f65d36b9 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -581,7 +581,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
* the lower time out
*/
for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
- usleep_range(500, 1000);
+ udelay(50);
mdic = rd32(IGC_MDIC);
if (mdic & IGC_MDIC_READY)
break;
@@ -638,7 +638,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
* the lower time out
*/
for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
- usleep_range(500, 1000);
+ udelay(50);
mdic = rd32(IGC_MDIC);
if (mdic & IGC_MDIC_READY)
break;
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 0d6e3215e98f..653e9f1e35b5 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -992,6 +992,17 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter)
igc_ptp_write_i225(adapter, &ts);
}
+static void igc_ptm_stop(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 ctrl;
+
+ ctrl = rd32(IGC_PTM_CTRL);
+ ctrl &= ~IGC_PTM_CTRL_EN;
+
+ wr32(IGC_PTM_CTRL, ctrl);
+}
+
/**
* igc_ptp_suspend - Disable PTP work items and prepare for suspend
* @adapter: Board private structure
@@ -1009,8 +1020,10 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
- if (pci_device_is_present(adapter->pdev))
+ if (pci_device_is_present(adapter->pdev)) {
igc_ptp_time_save(adapter);
+ igc_ptm_stop(adapter);
+ }
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index e596e1a9fc75..69d11ff7677d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -903,7 +903,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
/* Tx IPsec offload doesn't seem to work on this
* device, so block these requests for now.
*/
- if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) {
+ sam->flags = sam->flags & ~XFRM_OFFLOAD_IPV6;
+ if (sam->flags != XFRM_OFFLOAD_INBOUND) {
err = -EOPNOTSUPP;
goto err_out;
}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 3ad10c793308..66298e2235c9 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
int i, k;
- memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
if (!IS_ENABLED(CONFIG_SOC_MT7621))
return;
diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
index 32d83421226a..5897940a418b 100644
--- a/drivers/net/ethernet/mediatek/mtk_sgmii.c
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
@@ -26,6 +26,7 @@ int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3)
break;
ss->regmap[i] = syscon_node_to_regmap(np);
+ of_node_put(np);
if (IS_ERR(ss->regmap[i]))
return PTR_ERR(ss->regmap[i]);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
index 538adab6878b..c5b560a8b026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
struct mlx5_rsc_dump {
u32 pdn;
u32 mkey;
+ u32 number_of_menu_items;
u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
};
@@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name)
return -EINVAL;
}
-static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page)
+#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \
+ MLX5_ST_SZ_BYTES(resource_dump_menu_segment))
+
+static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page,
+ int read_size, int start_idx)
{
void *data = page_address(page);
enum mlx5_sgmt_type sgmt_idx;
int num_of_items;
char *sgmt_name;
void *member;
+ int size = 0;
void *menu;
int i;
- menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
- num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records);
+ if (!start_idx) {
+ menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu);
+ rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu,
+ num_of_records);
+ size = MLX5_RSC_DUMP_MENU_HEADER_SIZE;
+ data += size;
+ }
+ num_of_items = rsc_dump->number_of_menu_items;
+
+ for (i = 0; start_idx + i < num_of_items; i++) {
+ size += MLX5_ST_SZ_BYTES(resource_dump_menu_record);
+ if (size >= read_size)
+ return start_idx + i;
- for (i = 0; i < num_of_items; i++) {
- member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]);
+ member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i;
sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name);
sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name);
if (sgmt_idx == -EINVAL)
@@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct
rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record,
member, segment_type);
}
+ return 0;
}
static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd,
@@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
struct mlx5_rsc_dump_cmd *cmd = NULL;
struct mlx5_rsc_key key = {};
struct page *page;
+ int start_idx = 0;
int size;
int err;
@@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev)
if (err < 0)
goto destroy_cmd;
- mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page);
+ start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx);
} while (err > 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index 673f1c82d381..c9d5d8d93994 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz,
- xoff, &port_buffer, &update_buffer);
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+ port_buff_cell_sz, &port_buffer, &update_buffer);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
index af37a8d247a1..2755c25ba324 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -145,8 +145,7 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
flow_action_for_each(i, act, flow_action) {
tc_act = mlx5e_tc_act_get(act->id, ns_type);
- if (!tc_act || !tc_act->post_parse ||
- !tc_act->can_offload(parse_state, act, i, attr))
+ if (!tc_act || !tc_act->post_parse)
continue;
err = tc_act->post_parse(parse_state, priv, attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
index b9d38fe807df..a829c94289c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -45,12 +45,41 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
if (mlx5e_is_eswitch_flow(parse_state->flow))
attr->esw_attr->split_count = attr->esw_attr->out_count;
- if (!clear_action) {
+ if (clear_action) {
+ parse_state->ct_clear = true;
+ } else {
attr->flags |= MLX5_ATTR_FLAG_CT;
flow_flag_set(parse_state->flow, CT);
parse_state->ct = true;
}
- parse_state->ct_clear = clear_action;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+ int err;
+
+ /* If ct action exist, we can ignore previous ct_clear actions */
+ if (parse_state->ct)
+ return 0;
+
+ if (parse_state->ct_clear) {
+ err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Failed to set registers for ct clear");
+ return err;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Prevent handling of additional, redundant clear actions */
+ parse_state->ct_clear = false;
+ }
return 0;
}
@@ -70,5 +99,6 @@ struct mlx5e_tc_act mlx5e_tc_act_ct = {
.can_offload = tc_act_can_offload_ct,
.parse_action = tc_act_parse_ct,
.is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .post_parse = tc_act_post_parse_ct,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
index 59988e24b704..bec9ed0103a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -23,7 +23,7 @@ struct mlx5_ct_fs_smfs_matcher {
};
struct mlx5_ct_fs_smfs_matchers {
- struct mlx5_ct_fs_smfs_matcher smfs_matchers[4];
+ struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
struct list_head used;
};
@@ -44,7 +44,8 @@ struct mlx5_ct_fs_smfs_rule {
};
static inline void
-mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+ bool gre)
{
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
@@ -77,7 +78,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
ntohs(MLX5_CT_TCP_FLAGS_MASK));
- } else {
+ } else if (!gre) {
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
}
@@ -87,7 +88,7 @@ mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bo
static struct mlx5dr_matcher *
mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
- bool tcp, u32 priority)
+ bool tcp, bool gre, u32 priority)
{
struct mlx5dr_matcher *dr_matcher;
struct mlx5_flow_spec *spec;
@@ -96,7 +97,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
if (!spec)
return ERR_PTR(-ENOMEM);
- mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp);
+ mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
@@ -108,7 +109,7 @@ mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl,
}
static struct mlx5_ct_fs_smfs_matcher *
-mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp)
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
{
struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
@@ -119,7 +120,7 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
int prio;
matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
- smfs_matcher = &matchers->smfs_matchers[ipv4 * 2 + tcp];
+ smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
if (refcount_inc_not_zero(&smfs_matcher->ref))
return smfs_matcher;
@@ -145,11 +146,11 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp
}
tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
- dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, prio);
+ dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
if (IS_ERR(dr_matcher)) {
netdev_warn(fs->netdev,
- "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d), err: %ld\n",
- nat, ipv4, tcp, PTR_ERR(dr_matcher));
+ "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+ nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
smfs_matcher = ERR_CAST(dr_matcher);
goto out_unlock;
@@ -222,16 +223,17 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
static inline bool
mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
{
-#define DISSECTOR_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
- const u32 basic_keys = DISSECTOR_BIT(BASIC) | DISSECTOR_BIT(CONTROL) |
- DISSECTOR_BIT(PORTS) | DISSECTOR_BIT(META);
- const u32 ipv4_tcp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS) | DISSECTOR_BIT(TCP);
- const u32 ipv4_udp = basic_keys | DISSECTOR_BIT(IPV4_ADDRS);
- const u32 ipv6_tcp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS) | DISSECTOR_BIT(TCP);
- const u32 ipv6_udp = basic_keys | DISSECTOR_BIT(IPV6_ADDRS);
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+ const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+ const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+ const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
- used_keys == ipv6_udp);
+ used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
}
static bool
@@ -254,20 +256,24 @@ mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *f
flow_rule_match_control(flow_rule, &control);
flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
- flow_rule_match_ports(flow_rule, &ports);
- flow_rule_match_tcp(flow_rule, &tcp);
+ if (basic.key->ip_proto != IPPROTO_GRE)
+ flow_rule_match_ports(flow_rule, &ports);
+ if (basic.key->ip_proto == IPPROTO_TCP)
+ flow_rule_match_tcp(flow_rule, &tcp);
if (basic.mask->n_proto != htons(0xFFFF) ||
(basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
basic.mask->ip_proto != 0xFF ||
- (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP)) {
+ (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+ basic.key->ip_proto != IPPROTO_GRE)) {
ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
basic.key->ip_proto, basic.mask->ip_proto);
return false;
}
- if (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF)) {
+ if (basic.key->ip_proto != IPPROTO_GRE &&
+ (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
ports.mask->src, ports.mask->dst);
return false;
@@ -291,7 +297,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
struct mlx5dr_action *actions[5];
struct mlx5dr_rule *rule;
int num_actions = 0, err;
- bool nat, tcp, ipv4;
+ bool nat, tcp, ipv4, gre;
if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
return ERR_PTR(-EOPNOTSUPP);
@@ -314,15 +320,17 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
tcp = MLX5_GET(fte_match_param, spec->match_value,
outer_headers.ip_protocol) == IPPROTO_TCP;
+ gre = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_GRE;
- smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp);
+ smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
if (IS_ERR(smfs_matcher)) {
err = PTR_ERR(smfs_matcher);
goto err_matcher;
}
rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
- MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT);
+ spec->flow_context.flow_source);
if (!rule) {
err = -EINVAL;
goto err_create;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index e49f51124c74..ab4b0f3ee2a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -582,6 +582,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
return 0;
}
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
char *modact)
@@ -1410,9 +1416,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
- int err;
-
if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct action isn't available");
@@ -1423,17 +1426,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
attr->ct_attr.ct_action = act->ct.action;
attr->ct_attr.nf_ft = act->ct.flow_table;
- if (!clear_action)
- goto out;
-
- err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
- return err;
- }
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
-out:
return 0;
}
@@ -1749,6 +1741,8 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
{
+ struct mlx5e_priv *priv;
+
if (!refcount_dec_and_test(&ft->refcount))
return;
@@ -1758,6 +1752,8 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
rhashtable_free_and_destroy(&ft->ct_entries_ht,
mlx5_tc_ct_flush_ft_entry,
ct_priv);
+ priv = netdev_priv(ct_priv->netdev);
+ flush_workqueue(priv->wq);
mlx5_tc_ct_free_pre_ct_tables(ft);
mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
kfree(ft);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 36d3652bf829..00a3ba862afb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -129,6 +129,10 @@ bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id);
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
#else /* CONFIG_MLX5_TC_CT */
static inline struct mlx5_tc_ct_priv *
@@ -171,6 +175,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
}
static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_acts,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 378fc8e3bd97..d87bbb0be7c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -713,6 +713,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct net_device *filter_dev)
{
struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_int_port *int_port;
TC_TUN_ROUTE_ATTR_INIT(attr);
u16 vport_num;
@@ -747,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
misc_parameters.vxlan_vni);
esw_attr->rx_tun_attr->decap_vport = vport_num;
- } else if (netif_is_ovs_master(attr.route_dev)) {
+ } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
attr.route_dev->ifindex,
MLX5E_TC_INT_PORT_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index a55b066746cb..857840ab1e91 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
bool busy = false;
int work_done = 0;
+ rcu_read_lock();
+
ch_stats->poll++;
work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
busy |= work_done == budget;
busy |= rq->post_wqes(rq);
- if (busy)
- return budget;
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
if (unlikely(!napi_complete_done(napi, work_done)))
- return work_done;
+ goto out;
mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
return work_done;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index d659fe07d464..8ead2c82a52a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1200,6 +1200,16 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
return err;
WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) {
+ /*
+ * Align the driver state with the register state.
+ * Temporary state change is required to enable the app list reset.
+ */
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP;
+ mlx5e_dcbnl_delete_app(priv);
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+ }
+
mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
priv->dcbx_dp.trust_state);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2f1dedc721d1..fa229998606c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3864,6 +3864,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
if (netdev->features & NETIF_F_NTUPLE)
netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+ features &= ~NETIF_F_GRO_HW;
+ if (netdev->features & NETIF_F_GRO_HW)
+ netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
+
return features;
}
@@ -3896,6 +3900,25 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
}
}
+ if (params->xdp_prog) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with XDP\n");
+ features &= ~NETIF_F_LRO;
+ }
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
+ if (priv->xsk.refcnt) {
+ if (features & NETIF_F_GRO_HW) {
+ netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
+ priv->xsk.refcnt);
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
@@ -4850,10 +4873,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
- if (!!MLX5_CAP_GEN(mdev, shampo) &&
- mlx5e_check_fragmented_striding_rq_cap(mdev))
- netdev->hw_features |= NETIF_F_GRO_HW;
-
if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
netdev->hw_enc_features |= NETIF_F_HW_CSUM;
netdev->hw_enc_features |= NETIF_F_TSO;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e3fc15ae7bb1..ac0f73074f7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2459,6 +2459,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
match.key->vlan_priority);
*match_level = MLX5_MATCH_L2;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
+ match.mask->vlan_eth_type &&
+ MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
+ ft_field_support.outer_second_vid,
+ fs_type)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ spec->match_criteria_enable |=
+ MLX5_MATCH_MISC_PARAMETERS;
+ }
}
} else if (*match_level != MLX5_MATCH_NONE) {
/* cvlan_tag enabled in match criteria and
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3f63df127091..3b151332e2f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -139,7 +139,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
if (mlx5_esw_indir_table_decap_vport(attr))
vport = mlx5_esw_indir_table_decap_vport(attr);
- if (esw_attr->int_port)
+ if (attr && !attr->chain && esw_attr->int_port)
metadata =
mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port);
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 816d991f7621..3ad67e6b5586 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
clean_tree(&root_ns->ns.node);
}
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
-{
- struct mlx5_flow_steering *steering = dev->priv.steering;
-
- cleanup_root_ns(steering->root_ns);
- cleanup_root_ns(steering->fdb_root_ns);
- steering->fdb_root_ns = NULL;
- kfree(steering->fdb_sub_ns);
- steering->fdb_sub_ns = NULL;
- cleanup_root_ns(steering->port_sel_root_ns);
- cleanup_root_ns(steering->sniffer_rx_root_ns);
- cleanup_root_ns(steering->sniffer_tx_root_ns);
- cleanup_root_ns(steering->rdma_rx_root_ns);
- cleanup_root_ns(steering->rdma_tx_root_ns);
- cleanup_root_ns(steering->egress_root_ns);
- mlx5_cleanup_fc_stats(dev);
- kmem_cache_destroy(steering->ftes_cache);
- kmem_cache_destroy(steering->fgs_cache);
- mlx5_ft_pool_destroy(dev);
- kfree(steering);
-}
-
static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;
@@ -3086,42 +3064,27 @@ cleanup:
return err;
}
-int mlx5_init_fs(struct mlx5_core_dev *dev)
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
{
- struct mlx5_flow_steering *steering;
- int err = 0;
-
- err = mlx5_init_fc_stats(dev);
- if (err)
- return err;
-
- err = mlx5_ft_pool_init(dev);
- if (err)
- return err;
-
- steering = kzalloc(sizeof(*steering), GFP_KERNEL);
- if (!steering) {
- err = -ENOMEM;
- goto err;
- }
-
- steering->dev = dev;
- dev->priv.steering = steering;
+ struct mlx5_flow_steering *steering = dev->priv.steering;
- if (mlx5_fs_dr_is_supported(dev))
- steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
- else
- steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+ cleanup_root_ns(steering->root_ns);
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
+ cleanup_root_ns(steering->port_sel_root_ns);
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ cleanup_root_ns(steering->rdma_rx_root_ns);
+ cleanup_root_ns(steering->rdma_tx_root_ns);
+ cleanup_root_ns(steering->egress_root_ns);
+}
- steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
- sizeof(struct mlx5_flow_group), 0,
- 0, NULL);
- steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
- 0, NULL);
- if (!steering->ftes_cache || !steering->fgs_cache) {
- err = -ENOMEM;
- goto err;
- }
+int mlx5_fs_core_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err = 0;
if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
@@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
}
return 0;
+
+err:
+ mlx5_fs_core_cleanup(dev);
+ return err;
+}
+
+void mlx5_fs_core_free(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
+ kfree(steering);
+ mlx5_ft_pool_destroy(dev);
+ mlx5_cleanup_fc_stats(dev);
+}
+
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering;
+ int err = 0;
+
+ err = mlx5_init_fc_stats(dev);
+ if (err)
+ return err;
+
+ err = mlx5_ft_pool_init(dev);
+ if (err)
+ goto err;
+
+ steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+ if (!steering) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ steering->dev = dev;
+ dev->priv.steering = steering;
+
+ if (mlx5_fs_dr_is_supported(dev))
+ steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else
+ steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
+
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ return 0;
+
err:
- mlx5_cleanup_fs(dev);
+ mlx5_fs_core_free(dev);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index c488a7c5b07e..3f20523e514f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
enum mlx5_flow_steering_mode mode);
-int mlx5_init_fs(struct mlx5_core_dev *dev);
-void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
+void mlx5_fs_core_free(struct mlx5_core_dev *dev);
+int mlx5_fs_core_init(struct mlx5_core_dev *dev);
+void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);
int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 4aa22dce9b77..81eb67fb95b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -8,7 +8,8 @@
enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
- MLX5_FW_RESET_FLAGS_PENDING_COMP
+ MLX5_FW_RESET_FLAGS_PENDING_COMP,
+ MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
};
struct mlx5_fw_reset {
@@ -155,6 +156,28 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
}
}
+static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ del_timer_sync(&fw_reset->timer);
+}
+
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
+ mlx5_stop_sync_reset_poll(dev);
+ if (poll_health)
+ mlx5_start_health_poll(dev);
+ return 0;
+}
+
static void mlx5_sync_reset_reload_work(struct work_struct *work)
{
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
@@ -162,6 +185,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
+ mlx5_sync_reset_clear_reset_requested(dev, false);
mlx5_enter_error_state(dev, true);
mlx5_unload_one(dev);
err = mlx5_health_wait_pci_up(dev);
@@ -171,23 +195,6 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
mlx5_fw_reset_complete_reload(dev);
}
-static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
-{
- struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
-
- del_timer_sync(&fw_reset->timer);
-}
-
-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
-{
- struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
-
- mlx5_stop_sync_reset_poll(dev);
- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
- if (poll_health)
- mlx5_start_health_poll(dev);
-}
-
#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
static void poll_sync_reset(struct timer_list *t)
{
@@ -202,8 +209,10 @@ static void poll_sync_reset(struct timer_list *t)
if (fatal_error) {
mlx5_core_warn(dev, "Got Device Reset\n");
- mlx5_sync_reset_clear_reset_requested(dev, false);
- queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ else
+ mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
return;
}
@@ -229,13 +238,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
mlx5_stop_health_poll(dev, true);
- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
+ return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -264,7 +277,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
- mlx5_sync_reset_set_reset_requested(dev);
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -362,7 +377,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
- mlx5_sync_reset_clear_reset_requested(dev, false);
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
@@ -391,10 +407,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
-
- mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}
@@ -423,9 +437,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
struct mlx5_eqe *eqe = data;
+ if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ return NOTIFY_DONE;
+
switch (eqe->sub_type) {
case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
- queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+ queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
break;
case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
mlx5_sync_reset_events_handle(fw_reset, eqe);
@@ -469,6 +486,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
}
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+ cancel_work_sync(&fw_reset->fw_live_patch_work);
+ cancel_work_sync(&fw_reset->reset_request_work);
+ cancel_work_sync(&fw_reset->reset_reload_work);
+ cancel_work_sync(&fw_reset->reset_now_work);
+ cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
index 694fc7cb2684..dc141c7e641a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -16,6 +16,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
int mlx5_fw_reset_init(struct mlx5_core_dev *dev);
void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
index 4a6ec15ef046..d6c3e6dfd71f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c
@@ -100,6 +100,14 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
flush_workqueue(mp->wq);
}
+static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
+{
+ mp->fib.mfi = fi;
+ mp->fib.priority = fi->fib_priority;
+ mp->fib.dst = dst;
+ mp->fib.dst_len = dst_len;
+}
+
struct mlx5_fib_event_work {
struct work_struct work;
struct mlx5_lag *ldev;
@@ -110,10 +118,10 @@ struct mlx5_fib_event_work {
};
};
-static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
- unsigned long event,
- struct fib_info *fi)
+static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
+ struct fib_entry_notifier_info *fen_info)
{
+ struct fib_info *fi = fen_info->fi;
struct lag_mp *mp = &ldev->lag_mp;
struct fib_nh *fib_nh0, *fib_nh1;
unsigned int nhs;
@@ -121,13 +129,15 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
/* stop track */
- if (mp->mfi == fi)
- mp->mfi = NULL;
+ if (mp->fib.mfi == fi)
+ mp->fib.mfi = NULL;
return;
}
/* Handle multipath entry with lower priority value */
- if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
+ if (mp->fib.mfi && mp->fib.mfi != fi &&
+ (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
+ fi->fib_priority >= mp->fib.priority)
return;
/* Handle add/replace event */
@@ -143,9 +153,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
i++;
mlx5_lag_set_port_affinity(ldev, i);
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
- mp->mfi = fi;
return;
}
@@ -165,7 +175,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
/* First time we see multipath route */
- if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
+ if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
struct lag_tracker tracker;
tracker = ldev->tracker;
@@ -173,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
- mp->mfi = fi;
+ mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
@@ -184,7 +194,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
struct lag_mp *mp = &ldev->lag_mp;
/* Check the nh event is related to the route */
- if (!mp->mfi || mp->mfi != fi)
+ if (!mp->fib.mfi || mp->fib.mfi != fi)
return;
/* nh added/removed */
@@ -214,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work)
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
mlx5_lag_fib_route_event(ldev, fib_work->event,
- fib_work->fen_info.fi);
+ &fib_work->fen_info);
fib_info_put(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD:
@@ -313,7 +323,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
/* Clear mfi, as it might become stale when a route delete event
* has been missed, see mlx5_lag_fib_route_event().
*/
- ldev->lag_mp.mfi = NULL;
+ ldev->lag_mp.fib.mfi = NULL;
}
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
@@ -324,7 +334,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
/* always clear mfi, as it might become stale when a route delete event
* has been missed
*/
- mp->mfi = NULL;
+ mp->fib.mfi = NULL;
if (mp->fib_nb.notifier_call)
return 0;
@@ -354,5 +364,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
unregister_fib_notifier(&init_net, &mp->fib_nb);
destroy_workqueue(mp->wq);
mp->fib_nb.notifier_call = NULL;
- mp->mfi = NULL;
+ mp->fib.mfi = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
index 57af962cad29..056a066da604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h
@@ -15,7 +15,12 @@ enum mlx5_lag_port_affinity {
struct lag_mp {
struct notifier_block fib_nb;
- struct fib_info *mfi; /* used in tracking fib events */
+ struct {
+ const void *mfi; /* used in tracking fib events */
+ u32 priority;
+ u32 dst;
+ int dst_len;
+ } fib;
struct workqueue_struct *wq;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index a6592f9c3c05..5be322528279 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -505,7 +505,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev)
struct ttc_params ttc_params = {};
mlx5_lag_set_inner_ttc_params(ldev, &ttc_params);
- port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params);
+ port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params);
if (IS_ERR(port_sel->inner.ttc))
return PTR_ERR(port_sel->inner.ttc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
index b63dec24747a..b78f2ba25c19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
for (tt = 0; tt < MLX5_NUM_TT; tt++) {
struct mlx5_ttc_rule *rule = &rules[tt];
+ if (test_bit(tt, params->ignore_dests))
+ continue;
rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
&params->dests[tt],
ttc_rules[tt].etype,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2589e39eb9c7..ef196cb764e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_sf_table_cleanup;
}
+ err = mlx5_fs_core_alloc(dev);
+ if (err) {
+ mlx5_core_err(dev, "Failed to alloc flow steering\n");
+ goto err_fs;
+ }
+
dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
@@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
return 0;
+err_fs:
+ mlx5_sf_table_cleanup(dev);
err_sf_table_cleanup:
mlx5_sf_hw_table_cleanup(dev);
err_sf_hw_table_cleanup:
@@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_dm_cleanup(dev);
+ mlx5_fs_core_free(dev);
mlx5_sf_table_cleanup(dev);
mlx5_sf_hw_table_cleanup(dev);
mlx5_vhca_event_cleanup(dev);
@@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_tls_start;
}
- err = mlx5_init_fs(dev);
+ err = mlx5_fs_core_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init flow steering\n");
goto err_fs;
@@ -1236,7 +1245,7 @@ err_ec:
err_vhca:
mlx5_vhca_event_stop(dev);
err_set_hca:
- mlx5_cleanup_fs(dev);
+ mlx5_fs_core_cleanup(dev);
err_fs:
mlx5_accel_tls_cleanup(dev);
err_tls_start:
@@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
- mlx5_cleanup_fs(dev);
+ mlx5_fs_core_cleanup(dev);
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
@@ -1618,6 +1627,10 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
+ /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
+ * fw_reset before unregistering the devlink.
+ */
+ mlx5_drain_fw_reset(dev);
devlink_unregister(devlink);
mlx5_sriov_disable(pdev);
mlx5_crdump_disable(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 850937cd8bf9..1383550f44c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -530,6 +530,37 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
return 0;
}
+static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn,
+ struct mlx5dr_ste_actions_attr *attr,
+ bool rx_rule,
+ bool *recalc_cs_required)
+{
+ *recalc_cs_required = false;
+
+ /* if device supports csum recalculation - no adjustment needed */
+ if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps))
+ return;
+
+ /* no adjustment needed on TX rules */
+ if (!rx_rule)
+ return;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) {
+ /* Ignore the modify TTL action.
+ * It is always kept as last HW action.
+ */
+ attr->modify_actions--;
+ return;
+ }
+
+ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB)
+ /* Due to a HW bug on some devices, modifying TTL on RX flows
+ * will cause an incorrect checksum calculation. In such cases
+ * we will use a FW table to recalculate the checksum.
+ */
+ *recalc_cs_required = true;
+}
+
static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
struct mlx5dr_action *actions[],
int last_idx)
@@ -650,8 +681,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
case DR_ACTION_TYP_MODIFY_HDR:
attr.modify_index = action->rewrite->index;
attr.modify_actions = action->rewrite->num_of_actions;
- recalc_cs_required = action->rewrite->modify_ttl &&
- !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps);
+ if (action->rewrite->modify_ttl)
+ dr_action_modify_ttl_adjust(dmn, &attr, rx_rule,
+ &recalc_cs_required);
break;
case DR_ACTION_TYP_L2_TO_TNL_L2:
case DR_ACTION_TYP_L2_TO_TNL_L3:
@@ -732,12 +764,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
*new_hw_ste_arr_sz = nic_matcher->num_of_builders;
last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1);
- /* Due to a HW bug in some devices, modifying TTL on RX flows will
- * cause an incorrect checksum calculation. In this case we will
- * use a FW table to recalculate.
- */
- if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
- rx_rule && recalc_cs_required && dest_action) {
+ if (recalc_cs_required && dest_action) {
ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr);
if (ret) {
mlx5dr_err(dmn,
@@ -842,7 +869,8 @@ struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
u32 num_of_dests,
- bool ignore_flow_level)
+ bool ignore_flow_level,
+ u32 flow_source)
{
struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
struct mlx5dr_action **ref_actions;
@@ -914,7 +942,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
reformat_req,
&action->dest_tbl->fw_tbl.id,
&action->dest_tbl->fw_tbl.group_id,
- ignore_flow_level);
+ ignore_flow_level,
+ flow_source);
if (ret)
goto free_action;
@@ -1556,12 +1585,6 @@ dr_action_modify_check_is_ttl_modify(const void *sw_action)
return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
}
-static bool dr_action_modify_ttl_ignore(struct mlx5dr_domain *dmn)
-{
- return !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps) &&
- !MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify);
-}
-
static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
u32 max_hw_actions,
u32 num_sw_actions,
@@ -1573,6 +1596,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
struct mlx5dr_domain *dmn = action->rewrite->dmn;
+ __be64 *modify_ttl_sw_action = NULL;
int ret, i, hw_idx = 0;
__be64 *sw_action;
__be64 hw_action;
@@ -1585,8 +1609,14 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
action->rewrite->allow_rx = 1;
action->rewrite->allow_tx = 1;
- for (i = 0; i < num_sw_actions; i++) {
- sw_action = &sw_actions[i];
+ for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) {
+ /* modify TTL is handled separately, as a last action */
+ if (i == num_sw_actions) {
+ sw_action = modify_ttl_sw_action;
+ modify_ttl_sw_action = NULL;
+ } else {
+ sw_action = &sw_actions[i];
+ }
ret = dr_action_modify_check_field_limitation(action,
sw_action);
@@ -1595,10 +1625,9 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
if (!(*modify_ttl) &&
dr_action_modify_check_is_ttl_modify(sw_action)) {
- if (dr_action_modify_ttl_ignore(dmn))
- continue;
-
+ modify_ttl_sw_action = sw_action;
*modify_ttl = true;
+ continue;
}
/* Convert SW action to HW action */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 68a4c32d5f34..f05ef0cd54ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -104,7 +104,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
bool reformat_req,
u32 *tbl_id,
u32 *group_id,
- bool ignore_flow_level)
+ bool ignore_flow_level,
+ u32 flow_source)
{
struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
struct mlx5dr_cmd_fte_info fte_info = {};
@@ -139,6 +140,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
fte_info.val = val;
fte_info.dest_arr = dest;
fte_info.ignore_flow_level = ignore_flow_level;
+ fte_info.flow_context.flow_source = flow_source;
ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 5a322335f204..2010d4ac6519 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -420,7 +420,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
* encapsulation. The reason for that is that we support
* modify headers for outer headers only
*/
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
dr_ste_v0_set_rewrite_actions(last_ste,
attr->modify_actions,
@@ -513,7 +513,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
}
}
- if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
+ if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) {
if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
dr_ste_v0_arr_init_next(&last_ste,
added_stes,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 46866a5fc5ca..98320e3945ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -1461,7 +1461,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
bool reformat_req,
u32 *tbl_id,
u32 *group_id,
- bool ignore_flow_level);
+ bool ignore_flow_level,
+ u32 flow_source);
void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
u32 group_id);
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 045b0cf90063..728f81882589 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -520,6 +520,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
} else if (num_term_actions > 1) {
bool ignore_flow_level =
!!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+ u32 flow_source = fte->flow_context.flow_source;
if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
@@ -529,7 +530,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
term_actions,
num_term_actions,
- ignore_flow_level);
+ ignore_flow_level,
+ flow_source);
if (!tmp_action) {
err = -EOPNOTSUPP;
goto free_actions;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index ec5cbec0d455..7626c85643b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -99,7 +99,8 @@ struct mlx5dr_action *
mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
struct mlx5dr_action_dest *dests,
u32 num_of_dests,
- bool ignore_flow_level);
+ bool ignore_flow_level,
+ u32 flow_source);
struct mlx5dr_action *mlx5dr_action_create_drop(void);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 01cf5a6a26bd..a2ee695a3f17 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -568,10 +568,8 @@ static int
mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
- struct __ip6_tnl_parm parms6;
-
- parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev);
- return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &parms6.raddr,
+ return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp,
+ &ipip_entry->parms.daddr.addr6,
&ipip_entry->dip_kvdl_index);
}
@@ -579,10 +577,7 @@ static void
mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_ipip_entry *ipip_entry)
{
- struct __ip6_tnl_parm parms6;
-
- parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev);
- mlxsw_sp_ipv6_addr_put(mlxsw_sp, &parms6.raddr);
+ mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6);
}
static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index b73466470f75..fe663b0ab708 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -423,7 +423,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
- 0, 0, parms.link, tun->fwmark, 0);
+ 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0);
rt = ip_route_output_key(tun->net, &fl4);
if (IS_ERR(rt))
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
index 2679111ef669..005e56ea5da1 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c
@@ -346,7 +346,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
lan966x_mac_process_raw_entry(&raw_entries[column],
mac, &vid, &dest_idx);
- if (WARN_ON(dest_idx > lan966x->num_phys_ports))
+ if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
/* If the entry in SW is found, then there is nothing
@@ -393,7 +393,7 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row,
lan966x_mac_process_raw_entry(&raw_entries[column],
mac, &vid, &dest_idx);
- if (WARN_ON(dest_idx > lan966x->num_phys_ports))
+ if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 958e55596b82..05f6dcc9dfd5 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -103,6 +103,24 @@ static int lan966x_create_targets(struct platform_device *pdev,
return 0;
}
+static bool lan966x_port_unique_address(struct net_device *dev)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ int p;
+
+ for (p = 0; p < lan966x->num_phys_ports; ++p) {
+ port = lan966x->ports[p];
+ if (!port || port->dev == dev)
+ continue;
+
+ if (ether_addr_equal(dev->dev_addr, port->dev->dev_addr))
+ return false;
+ }
+
+ return true;
+}
+
static int lan966x_port_set_mac_address(struct net_device *dev, void *p)
{
struct lan966x_port *port = netdev_priv(dev);
@@ -110,16 +128,26 @@ static int lan966x_port_set_mac_address(struct net_device *dev, void *p)
const struct sockaddr *addr = p;
int ret;
+ if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+ return 0;
+
/* Learn the new net device MAC address in the mac table. */
ret = lan966x_mac_cpu_learn(lan966x, addr->sa_data, HOST_PVID);
if (ret)
return ret;
+ /* If there is another port with the same address as the dev, then don't
+ * delete it from the MAC table
+ */
+ if (!lan966x_port_unique_address(dev))
+ goto out;
+
/* Then forget the previous one. */
ret = lan966x_mac_cpu_forget(lan966x, dev->dev_addr, HOST_PVID);
if (ret)
return ret;
+out:
eth_hw_addr_set(dev, addr->sa_data);
return ret;
}
@@ -671,6 +699,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x)
disable_irq(lan966x->ana_irq);
lan966x->ana_irq = -ENXIO;
}
+
+ if (lan966x->ptp_irq)
+ devm_free_irq(lan966x->dev, lan966x->ptp_irq, lan966x);
}
static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index e443bd8b2d09..20ceac81a2c2 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -551,7 +551,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
struct ocelot_port *ocelot_port = ocelot->ports[port];
struct ocelot_vcap_filter *filter;
- int err;
+ int err = 0;
u32 val;
list_for_each_entry(filter, &block->rules, list) {
@@ -570,7 +570,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
if (vlan_aware)
err = ocelot_del_vlan_unaware_pvid(ocelot, port,
ocelot_port->bridge);
- else
+ else if (ocelot_port->bridge)
err = ocelot_add_vlan_unaware_pvid(ocelot, port,
ocelot_port->bridge);
if (err)
@@ -629,6 +629,13 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
{
int err;
+ /* Ignore VID 0 added to our RX filter by the 8021q module, since
+ * that collides with OCELOT_STANDALONE_PVID and changes it from
+ * egress-untagged to egress-tagged.
+ */
+ if (!vid)
+ return 0;
+
err = ocelot_vlan_member_add(ocelot, port, vid, untagged);
if (err)
return err;
@@ -651,6 +658,9 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
bool del_pvid = false;
int err;
+ if (!vid)
+ return 0;
+
if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid)
del_pvid = true;
@@ -1612,7 +1622,7 @@ int ocelot_trap_add(struct ocelot *ocelot, int port,
trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
trap->action.port_mask = 0;
trap->take_ts = take_ts;
- list_add_tail(&trap->trap_list, &ocelot->traps);
+ trap->is_trap = true;
new = true;
}
@@ -1624,10 +1634,8 @@ int ocelot_trap_add(struct ocelot *ocelot, int port,
err = ocelot_vcap_filter_replace(ocelot, trap);
if (err) {
trap->ingress_port_mask &= ~BIT(port);
- if (!trap->ingress_port_mask) {
- list_del(&trap->trap_list);
+ if (!trap->ingress_port_mask)
kfree(trap);
- }
return err;
}
@@ -1647,11 +1655,8 @@ int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie)
return 0;
trap->ingress_port_mask &= ~BIT(port);
- if (!trap->ingress_port_mask) {
- list_del(&trap->trap_list);
-
+ if (!trap->ingress_port_mask)
return ocelot_vcap_filter_del(ocelot, trap);
- }
return ocelot_vcap_filter_replace(ocelot, trap);
}
@@ -2859,6 +2864,8 @@ static void ocelot_port_set_mcast_flood(struct ocelot *ocelot, int port,
val = BIT(port);
ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MC);
+ ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV4);
+ ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV6);
}
static void ocelot_port_set_bcast_flood(struct ocelot *ocelot, int port,
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 03b5e59d033e..51cf241ff7d0 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -280,9 +280,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
break;
case FLOW_ACTION_TRAP:
- if (filter->block_id != VCAP_IS2) {
+ if (filter->block_id != VCAP_IS2 ||
+ filter->lookup != 0) {
NL_SET_ERR_MSG_MOD(extack,
- "Trap action can only be offloaded to VCAP IS2");
+ "Trap action can only be offloaded to VCAP IS2 lookup 0");
return -EOPNOTSUPP;
}
if (filter->goto_target != -1) {
@@ -295,7 +296,7 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
filter->action.cpu_copy_ena = true;
filter->action.cpu_qu_num = 0;
filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
- list_add_tail(&filter->trap_list, &ocelot->traps);
+ filter->is_trap = true;
break;
case FLOW_ACTION_POLICE:
if (filter->block_id == PSFP_BLOCK_ID) {
@@ -878,8 +879,6 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
ret = ocelot_flower_parse(ocelot, port, ingress, f, filter);
if (ret) {
- if (!list_empty(&filter->trap_list))
- list_del(&filter->trap_list);
kfree(filter);
return ret;
}
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
index c8701ac955a8..eeb4cc07dd16 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
@@ -374,7 +374,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix,
OCELOT_VCAP_BIT_0);
vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0,
~filter->ingress_port_mask);
- vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY);
vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH,
OCELOT_VCAP_BIT_ANY);
vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc);
@@ -1217,6 +1216,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
struct ocelot_vcap_filter *tmp;
tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+ /* Read back the filter's counters before moving it */
+ vcap_entry_get(ocelot, i - 1, tmp);
vcap_entry_set(ocelot, i, tmp);
}
@@ -1250,7 +1251,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
struct ocelot_vcap_filter del_filter;
int i, index;
+ /* Need to inherit the block_id so that vcap_entry_set()
+ * does not get confused and knows where to install it.
+ */
memset(&del_filter, 0, sizeof(del_filter));
+ del_filter.block_id = filter->block_id;
/* Gets index of the filter */
index = ocelot_vcap_block_get_filter_index(block, filter);
@@ -1265,6 +1270,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
struct ocelot_vcap_filter *tmp;
tmp = ocelot_vcap_block_find_filter_by_index(block, i);
+ /* Read back the filter's counters before moving it */
+ vcap_entry_get(ocelot, i + 1, tmp);
vcap_entry_set(ocelot, i, tmp);
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 6ffc62c41165..0a7a757494bc 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -256,7 +256,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = ionic_map_bars(ionic);
if (err)
- goto err_out_pci_disable_device;
+ goto err_out_pci_release_regions;
/* Configure the device */
err = ionic_setup(ionic);
@@ -360,6 +360,7 @@ err_out_teardown:
err_out_unmap_bars:
ionic_unmap_bars(ionic);
+err_out_pci_release_regions:
pci_release_regions(pdev);
err_out_pci_disable_device:
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b30589a135c2..06f4d9a9e938 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3614,7 +3614,8 @@ static void ql_reset_work(struct work_struct *work)
qdev->mem_map_registers;
unsigned long hw_flags;
- if (test_bit((QL_RESET_PER_SCSI | QL_RESET_START), &qdev->flags)) {
+ if (test_bit(QL_RESET_PER_SCSI, &qdev->flags) ||
+ test_bit(QL_RESET_START, &qdev->flags)) {
clear_bit(QL_LINK_MASTER, &qdev->flags);
/*
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 50d535981a35..f8edb3f1b73a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3579,6 +3579,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx)
n_parts++;
}
+ if (!n_parts) {
+ kfree(parts);
+ return 0;
+ }
+
rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
fail:
if (rc)
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index 377df8b7f015..40df910aa140 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -867,7 +867,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx)
int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
{
- struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+ struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel,
+ *ptp_channel = efx_ptp_channel(efx);
+ struct efx_ptp_data *ptp_data = efx->ptp_data;
unsigned int i, next_buffer_table = 0;
u32 old_rxq_entries, old_txq_entries;
int rc, rc2;
@@ -938,6 +940,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
efx_set_xdp_channels(efx);
out:
+ efx->ptp_data = NULL;
/* Destroy unused channel structures */
for (i = 0; i < efx->n_channels; i++) {
channel = other_channel[i];
@@ -948,6 +951,7 @@ out:
}
}
+ efx->ptp_data = ptp_data;
rc2 = efx_soft_enable_interrupts(efx);
if (rc2) {
rc = rc ? rc : rc2;
@@ -966,6 +970,7 @@ rollback:
efx->txq_entries = old_txq_entries;
for (i = 0; i < efx->n_channels; i++)
swap(efx->channel[i], other_channel[i]);
+ efx_ptp_update_channel(efx, ptp_channel);
goto out;
}
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index f0ef515e2ade..4625f85acab2 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -45,6 +45,7 @@
#include "farch_regs.h"
#include "tx.h"
#include "nic.h" /* indirectly includes ptp.h */
+#include "efx_channels.h"
/* Maximum number of events expected to make up a PTP event */
#define MAX_EVENT_FRAGS 3
@@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
return efx->ptp_data ? efx->ptp_data->channel : NULL;
}
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel)
+{
+ if (efx->ptp_data)
+ efx->ptp_data->channel = channel;
+}
+
static u32 last_sync_timestamp_major(struct efx_nic *efx)
{
struct efx_channel *channel = efx_ptp_channel(efx);
@@ -1443,6 +1450,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
int rc = 0;
unsigned int pos;
+ if (efx->ptp_data) {
+ efx->ptp_data->channel = channel;
+ return 0;
+ }
+
ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
efx->ptp_data = ptp;
if (!efx->ptp_data)
@@ -2176,7 +2188,7 @@ static const struct efx_channel_type efx_ptp_channel_type = {
.pre_probe = efx_ptp_probe_channel,
.post_remove = efx_ptp_remove_channel,
.get_name = efx_ptp_get_channel_name,
- /* no copy operation; there is no need to reallocate this channel */
+ .copy = efx_copy_channel,
.receive_skb = efx_ptp_rx,
.want_txqs = efx_ptp_want_txqs,
.keep_eventq = false,
diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h
index 9855e8c9e544..7b1ef7002b3f 100644
--- a/drivers/net/ethernet/sfc/ptp.h
+++ b/drivers/net/ethernet/sfc/ptp.h
@@ -16,6 +16,7 @@ struct ethtool_ts_info;
int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
+void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel);
void efx_ptp_remove(struct efx_nic *efx);
int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 7a50ba00f8ae..c854efdf1f25 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2431,7 +2431,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
if (irq == -EPROBE_DEFER) {
retval = -EPROBE_DEFER;
goto out_0;
- } else if (irq <= 0) {
+ } else if (irq < 0) {
pr_warn("Could not allocate irq resource\n");
retval = -ENODEV;
goto out_0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 63754a9c4ba7..0b0be0898ac5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -454,6 +454,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->has_gmac4 = 1;
plat->force_sf_dma_mode = 0;
plat->tso_en = 1;
+ plat->sph_disable = 1;
/* Multiplying factor to the clk_eee_i clock time
* period to make it closer to 100 ns. This value
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index ac9e6c7a33b5..6b447d8f0bd8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -65,8 +65,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
struct phy_device *phy_dev = ndev->phydev;
u32 val;
- writew(SGMII_ADAPTER_DISABLE,
- sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
+ if (sgmii_adapter_base)
+ writew(SGMII_ADAPTER_DISABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
if (splitter_base) {
val = readl(splitter_base + EMAC_SPLITTER_CTRL_REG);
@@ -88,10 +89,11 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed)
writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG);
}
- writew(SGMII_ADAPTER_ENABLE,
- sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
- if (phy_dev)
+ if (phy_dev && sgmii_adapter_base) {
+ writew(SGMII_ADAPTER_ENABLE,
+ sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG);
tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed);
+ }
}
static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index f86cc83003f2..f834472599f7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -907,6 +907,7 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv)
ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn,
&gmac->mux_handle, priv, priv->mii);
+ of_node_put(mdio_mux);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 22fea0f67245..92d32940aff0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -71,9 +71,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec)
writel(value, ioaddr + PTP_TCR);
/* wait for present system time initialize to complete */
- return readl_poll_timeout(ioaddr + PTP_TCR, value,
+ return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value,
!(value & PTP_TCR_TSINIT),
- 10000, 100000);
+ 10, 100000);
}
static int config_addend(void __iomem *ioaddr, u32 addend)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4a4b3651ab3e..2525a80353b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7021,7 +7021,7 @@ int stmmac_dvr_probe(struct device *device,
dev_info(priv->device, "TSO feature enabled\n");
}
- if (priv->dma_cap.sphen) {
+ if (priv->dma_cap.sphen && !priv->plat->sph_disable) {
ndev->hw_features |= NETIF_F_GRO;
priv->sph_cap = true;
priv->sph = priv->sph_cap;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index fcf17d8a0494..644bb54f5f02 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -181,7 +181,7 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
return -ENOMEM;
/* Enable pci device */
- ret = pci_enable_device(pdev);
+ ret = pcim_enable_device(pdev);
if (ret) {
dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
__func__);
@@ -241,8 +241,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev)
pcim_iounmap_regions(pdev, BIT(i));
break;
}
-
- pci_disable_device(pdev);
}
static int __maybe_unused stmmac_pci_suspend(struct device *dev)
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index bd4b1528cf99..79e850fe4621 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1246,8 +1246,10 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
data->slave_data = devm_kcalloc(dev, CPSW_SLAVE_PORTS_NUM,
sizeof(struct cpsw_slave_data),
GFP_KERNEL);
- if (!data->slave_data)
+ if (!data->slave_data) {
+ of_node_put(tmp_node);
return -ENOMEM;
+ }
/* Populate all the child nodes here...
*/
@@ -1341,6 +1343,7 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
err_node_put:
of_node_put(port_np);
+ of_node_put(tmp_node);
return ret;
}
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 57a24f62e353..d770b3ac3f74 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -823,10 +823,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
{
struct mii_bus *bus;
- int rc;
struct resource res;
struct device_node *np = of_get_parent(lp->phy_node);
struct device_node *npp;
+ int rc, ret;
/* Don't register the MDIO bus if the phy_node or its parent node
* can't be found.
@@ -836,8 +836,14 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
return -ENODEV;
}
npp = of_get_parent(np);
-
- of_address_to_resource(npp, 0, &res);
+ ret = of_address_to_resource(npp, 0, &res);
+ of_node_put(npp);
+ if (ret) {
+ dev_err(dev, "%s resource error!\n",
+ dev->of_node->full_name);
+ of_node_put(np);
+ return ret;
+ }
if (lp->ndev->mem_start != res.start) {
struct phy_device *phydev;
phydev = of_phy_find_device(lp->phy_node);
@@ -846,6 +852,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
"MDIO of the phy is not registered yet\n");
else
put_device(&phydev->mdio.dev);
+ of_node_put(np);
return 0;
}
@@ -858,6 +865,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
bus = mdiobus_alloc();
if (!bus) {
dev_err(dev, "Failed to allocate mdiobus\n");
+ of_node_put(np);
return -ENOMEM;
}
@@ -870,6 +878,7 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev)
bus->parent = dev;
rc = of_mdiobus_register(bus, np);
+ of_node_put(np);
if (rc) {
dev_err(dev, "Failed to register mdio bus.\n");
goto err_register;
@@ -926,8 +935,6 @@ static int xemaclite_open(struct net_device *dev)
xemaclite_disable_interrupts(lp);
if (lp->phy_node) {
- u32 bmcr;
-
lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node,
xemaclite_adjust_link, 0,
PHY_INTERFACE_MODE_MII);
@@ -938,19 +945,6 @@ static int xemaclite_open(struct net_device *dev)
/* EmacLite doesn't support giga-bit speeds */
phy_set_max_speed(lp->phy_dev, SPEED_100);
-
- /* Don't advertise 1000BASE-T Full/Half duplex speeds */
- phy_write(lp->phy_dev, MII_CTRL1000, 0);
-
- /* Advertise only 10 and 100mbps full/half duplex speeds */
- phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL |
- ADVERTISE_CSMA);
-
- /* Restart auto negotiation */
- bmcr = phy_read(lp->phy_dev, MII_BMCR);
- bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
- phy_write(lp->phy_dev, MII_BMCR, bmcr);
-
phy_start(lp->phy_dev);
}
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 16105292b140..74e845fa2e07 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1355,7 +1355,9 @@ static int rr_close(struct net_device *dev)
rrpriv->fw_running = 0;
+ spin_unlock_irqrestore(&rrpriv->lock, flags);
del_timer_sync(&rrpriv->timer);
+ spin_lock_irqsave(&rrpriv->lock, flags);
writel(0, &regs->TxPi);
writel(0, &regs->IpRxPi);
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index bc981043cc80..a701178a1d13 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -1367,9 +1367,10 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
struct gsi_event *event_done;
struct gsi_event *event;
struct gsi_trans *trans;
+ u32 trans_count = 0;
u32 byte_count = 0;
- u32 old_index;
u32 event_avail;
+ u32 old_index;
trans_info = &channel->trans_info;
@@ -1390,6 +1391,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
do {
trans->len = __le16_to_cpu(event->len);
byte_count += trans->len;
+ trans_count++;
/* Move on to the next event and transaction */
if (--event_avail)
@@ -1401,7 +1403,7 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
/* We record RX bytes when they are received */
channel->byte_count += byte_count;
- channel->trans_count++;
+ channel->trans_count += trans_count;
}
/* Initialize a ring, including allocating DMA memory for its entries */
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 888e94278a84..cea7b2e2ce96 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1150,13 +1150,12 @@ static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint,
return;
skb = __dev_alloc_skb(len, GFP_ATOMIC);
- if (!skb)
- return;
-
- /* Copy the data into the socket buffer and receive it */
- skb_put(skb, len);
- memcpy(skb->data, data, len);
- skb->truesize += extra;
+ if (skb) {
+ /* Copy the data into the socket buffer and receive it */
+ skb_put(skb, len);
+ memcpy(skb->data, data, len);
+ skb->truesize += extra;
+ }
ipa_modem_skb_rx(endpoint->netdev, skb);
}
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index 90f3aec55b36..ec010cf2e816 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -125,7 +125,7 @@ static void ipa_qmi_indication(struct ipa_qmi *ipa_qmi)
*/
static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi)
{
- struct ipa *ipa = container_of(ipa_qmi, struct ipa, qmi);
+ struct ipa *ipa;
int ret;
/* We aren't ready until the modem and microcontroller are */
diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
index 6dcbf987d61b..8b444a8eb6b5 100644
--- a/drivers/net/mdio/mdio-mux-bcm6368.c
+++ b/drivers/net/mdio/mdio-mux-bcm6368.c
@@ -115,7 +115,7 @@ static int bcm6368_mdiomux_probe(struct platform_device *pdev)
md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
if (!md->mii_bus) {
dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
- return ENOMEM;
+ return -ENOMEM;
}
bus = md->mii_bus;
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index b6fea119fe13..2b7d0720720b 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -880,7 +880,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev)
cssr1 = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_CSSR1);
if (cssr1 < 0)
- return val;
+ return cssr1;
/* If the link settings are not resolved, mark the link down */
if (!(cssr1 & MV_PCS_CSSR1_RESOLVED)) {
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index fc53b71dc872..cd9aa353b653 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1743,7 +1743,7 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev,
static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr)
{
- u32 data;
+ int data;
phy_lock_mdio_bus(phydev);
__phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page);
@@ -2444,8 +2444,7 @@ static int lan8804_config_init(struct phy_device *phydev)
static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev)
{
- u16 tsu_irq_status;
- int irq_status;
+ int irq_status, tsu_irq_status;
irq_status = phy_read(phydev, LAN8814_INTS);
if (irq_status > 0 && (irq_status & LAN8814_INT_LINK))
@@ -2657,6 +2656,7 @@ static struct phy_driver ksphy_driver[] = {
.name = "Micrel KS8737",
/* PHY_BASIC_FEATURES */
.driver_data = &ks8737_type,
+ .probe = kszphy_probe,
.config_init = kszphy_config_init,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
@@ -2782,8 +2782,8 @@ static struct phy_driver ksphy_driver[] = {
.config_init = ksz8061_config_init,
.config_intr = kszphy_config_intr,
.handle_interrupt = kszphy_handle_interrupt,
- .suspend = kszphy_suspend,
- .resume = kszphy_resume,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
}, {
.phy_id = PHY_ID_KSZ9021,
.phy_id_mask = 0x000ffffe,
diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
index 3f79bbbe62d3..c2c0e361fd3d 100644
--- a/drivers/net/phy/microchip_t1.c
+++ b/drivers/net/phy/microchip_t1.c
@@ -743,6 +743,7 @@ static struct phy_driver microchip_t1_phy_driver[] = {
{
PHY_ID_MATCH_MODEL(PHY_ID_LAN937X),
.name = "Microchip LAN937x T1",
+ .flags = PHY_POLL_CABLE_TEST,
.features = PHY_BASIC_T1_FEATURES,
.config_init = lan87xx_config_init,
.suspend = genphy_suspend,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index beb2b66da132..f122026c4682 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -970,8 +970,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
{
struct phy_device *phydev = phy_dat;
struct phy_driver *drv = phydev->drv;
+ irqreturn_t ret;
- return drv->handle_interrupt(phydev);
+ mutex_lock(&phydev->lock);
+ ret = drv->handle_interrupt(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
}
/**
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 4dfb79807823..9a5d5a10560f 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -250,6 +250,7 @@ struct sfp {
struct sfp_eeprom_id id;
unsigned int module_power_mW;
unsigned int module_t_start_up;
+ bool tx_fault_ignore;
#if IS_ENABLED(CONFIG_HWMON)
struct sfp_diag diag;
@@ -1956,6 +1957,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
else
sfp->module_t_start_up = T_START_UP;
+ if (!memcmp(id.base.vendor_name, "HUAWEI ", 16) &&
+ !memcmp(id.base.vendor_pn, "MA5671A ", 16))
+ sfp->tx_fault_ignore = true;
+ else
+ sfp->tx_fault_ignore = false;
+
return 0;
}
@@ -2409,7 +2416,10 @@ static void sfp_check_state(struct sfp *sfp)
mutex_lock(&sfp->st_mutex);
state = sfp_get_state(sfp);
changed = state ^ sfp->state;
- changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
+ if (sfp->tx_fault_ignore)
+ changed &= SFP_F_PRESENT | SFP_F_LOS;
+ else
+ changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
for (i = 0; i < GPIO_MAX; i++)
if (changed & BIT(i))
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 3619520340b7..e172743948ed 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
path->encap.proto = htons(ETH_P_PPP_SES);
path->encap.id = be16_to_cpu(po->num);
memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
path->dev = ctx->dev;
ctx->dev = dev;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 87838cbe38cf..cbba9d2e8f32 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1005,6 +1005,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
* xdp.data_meta were adjusted
*/
len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
+
+ /* recalculate headroom if xdp.data or xdp_data_meta
+ * were adjusted, note that offset should always point
+ * to the start of the reserved bytes for virtio_net
+ * header which are followed by xdp.data, that means
+ * that offset is equal to the headroom (when buf is
+ * starting at the beginning of the page, otherwise
+ * there is a base offset inside the page) but it's used
+ * with a different starting point (buf start) than
+ * xdp.data (buf start + vnet hdr size). If xdp.data or
+ * data_meta were adjusted by the xdp prog then the
+ * headroom size has changed and so has the offset, we
+ * can use data_hard_start, which points at buf start +
+ * vnet hdr size, to calculate the new headroom and use
+ * it later to compute buf start in page_to_skb()
+ */
+ headroom = xdp.data - xdp.data_hard_start - metasize;
+
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
@@ -1012,7 +1030,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
head_skb = page_to_skb(vi, rq, xdp_page, offset,
len, PAGE_SIZE, false,
metasize,
- VIRTIO_XDP_HEADROOM);
+ headroom);
return head_skb;
}
break;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d9d90baac72a..93e8d119d45f 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -589,6 +589,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
if (dma_mapping_error(&adapter->pdev->dev,
rbi->dma_addr)) {
dev_kfree_skb_any(rbi->skb);
+ rbi->skb = NULL;
rq->stats.rx_buf_alloc_failure++;
break;
}
@@ -613,6 +614,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
if (dma_mapping_error(&adapter->pdev->dev,
rbi->dma_addr)) {
put_page(rbi->page);
+ rbi->page = NULL;
rq->stats.rx_buf_alloc_failure++;
break;
}
@@ -1666,6 +1668,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
u32 i, ring_idx;
struct Vmxnet3_RxDesc *rxd;
+ /* ring has already been cleaned up */
+ if (!rq->rx_ring[0].base)
+ return;
+
for (ring_idx = 0; ring_idx < 2; ring_idx++) {
for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
#ifdef __BIG_ENDIAN_BITFIELD
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 23d2954d9747..1e5672019922 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -349,7 +349,7 @@ static int __init cosa_init(void)
}
} else {
cosa_major = register_chrdev(0, "cosa", &cosa_fops);
- if (!cosa_major) {
+ if (cosa_major < 0) {
pr_warn("unable to register chardev\n");
err = -EIO;
goto out;
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index 0fad1331303c..aa9a7a5970fd 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -19,6 +19,7 @@
#include <linux/if_arp.h>
#include <linux/icmp.h>
#include <linux/suspend.h>
+#include <net/dst_metadata.h>
#include <net/icmp.h>
#include <net/rtnetlink.h>
#include <net/ip_tunnels.h>
@@ -167,7 +168,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_peer;
}
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
__skb_queue_head_init(&packets);
if (!skb_is_gso(skb)) {
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 71eb7d04c3bf..90a5df1fbdbd 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -1288,6 +1288,7 @@ static void ath11k_core_restart(struct work_struct *work)
ieee80211_stop_queues(ar->hw);
ath11k_mac_drain_tx(ar);
+ complete(&ar->completed_11d_scan);
complete(&ar->scan.started);
complete(&ar->scan.completed);
complete(&ar->peer_assoc_done);
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index c0228e91a596..b8634eddf49a 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -38,6 +38,8 @@
extern unsigned int ath11k_frame_mode;
+#define ATH11K_SCAN_TIMEOUT_HZ (20 * HZ)
+
#define ATH11K_MON_TIMER_INTERVAL 10
enum ath11k_supported_bw {
@@ -189,6 +191,12 @@ enum ath11k_scan_state {
ATH11K_SCAN_ABORTING,
};
+enum ath11k_11d_state {
+ ATH11K_11D_IDLE,
+ ATH11K_11D_PREPARING,
+ ATH11K_11D_RUNNING,
+};
+
enum ath11k_dev_flags {
ATH11K_CAC_RUNNING,
ATH11K_FLAG_CORE_REGISTERED,
@@ -607,9 +615,8 @@ struct ath11k {
bool dfs_block_radar_events;
struct ath11k_thermal thermal;
u32 vdev_id_11d_scan;
- struct completion finish_11d_scan;
- struct completion finish_11d_ch_list;
- bool pending_11d;
+ struct completion completed_11d_scan;
+ enum ath11k_11d_state state_11d;
bool regdom_set_by_user;
int hw_rate_code;
u8 twt_enabled;
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index e6b34b0d61bd..58ff761393db 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3601,26 +3601,6 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw,
if (ret)
goto exit;
- /* Currently the pending_11d=true only happened 1 time while
- * wlan interface up in ath11k_mac_11d_scan_start(), it is called by
- * ath11k_mac_op_add_interface(), after wlan interface up,
- * pending_11d=false always.
- * If remove below wait, it always happened scan fail and lead connect
- * fail while wlan interface up, because it has a 11d scan which is running
- * in firmware, and lead this scan failed.
- */
- if (ar->pending_11d) {
- long time_left;
- unsigned long timeout = 5 * HZ;
-
- if (ar->supports_6ghz)
- timeout += 5 * HZ;
-
- time_left = wait_for_completion_timeout(&ar->finish_11d_ch_list, timeout);
- ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
- "mac wait 11d channel list time left %ld\n", time_left);
- }
-
memset(&arg, 0, sizeof(arg));
ath11k_wmi_start_scan_init(ar, &arg);
arg.vdev_id = arvif->vdev_id;
@@ -3686,6 +3666,10 @@ exit:
kfree(arg.extraie.ptr);
mutex_unlock(&ar->conf_mutex);
+
+ if (ar->state_11d == ATH11K_11D_PREPARING)
+ ath11k_mac_11d_scan_start(ar, arvif->vdev_id);
+
return ret;
}
@@ -5814,7 +5798,7 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw)
/* TODO: Do we need to enable ANI? */
- ath11k_reg_update_chan_list(ar);
+ ath11k_reg_update_chan_list(ar, false);
ar->num_started_vdevs = 0;
ar->num_created_vdevs = 0;
@@ -5881,6 +5865,11 @@ static void ath11k_mac_op_stop(struct ieee80211_hw *hw)
cancel_work_sync(&ar->ab->update_11d_work);
cancel_work_sync(&ar->ab->rfkill_work);
+ if (ar->state_11d == ATH11K_11D_PREPARING) {
+ ar->state_11d = ATH11K_11D_IDLE;
+ complete(&ar->completed_11d_scan);
+ }
+
spin_lock_bh(&ar->data_lock);
list_for_each_entry_safe(ppdu_stats, tmp, &ar->ppdu_stats_info, list) {
list_del(&ppdu_stats->list);
@@ -6051,7 +6040,7 @@ static bool ath11k_mac_vif_ap_active_any(struct ath11k_base *ab)
return false;
}
-void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait)
+void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id)
{
struct wmi_11d_scan_start_params param;
int ret;
@@ -6079,28 +6068,22 @@ void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait)
ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "mac start 11d scan\n");
- if (wait)
- reinit_completion(&ar->finish_11d_scan);
-
ret = ath11k_wmi_send_11d_scan_start_cmd(ar, &param);
if (ret) {
ath11k_warn(ar->ab, "failed to start 11d scan vdev %d ret: %d\n",
vdev_id, ret);
} else {
ar->vdev_id_11d_scan = vdev_id;
- if (wait) {
- ar->pending_11d = true;
- ret = wait_for_completion_timeout(&ar->finish_11d_scan,
- 5 * HZ);
- ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
- "mac 11d scan left time %d\n", ret);
-
- if (!ret)
- ar->pending_11d = false;
- }
+ if (ar->state_11d == ATH11K_11D_PREPARING)
+ ar->state_11d = ATH11K_11D_RUNNING;
}
fin:
+ if (ar->state_11d == ATH11K_11D_PREPARING) {
+ ar->state_11d = ATH11K_11D_IDLE;
+ complete(&ar->completed_11d_scan);
+ }
+
mutex_unlock(&ar->ab->vdev_id_11d_lock);
}
@@ -6123,12 +6106,15 @@ void ath11k_mac_11d_scan_stop(struct ath11k *ar)
vdev_id = ar->vdev_id_11d_scan;
ret = ath11k_wmi_send_11d_scan_stop_cmd(ar, vdev_id);
- if (ret)
+ if (ret) {
ath11k_warn(ar->ab,
"failed to stopt 11d scan vdev %d ret: %d\n",
vdev_id, ret);
- else
+ } else {
ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID;
+ ar->state_11d = ATH11K_11D_IDLE;
+ complete(&ar->completed_11d_scan);
+ }
}
mutex_unlock(&ar->ab->vdev_id_11d_lock);
}
@@ -6324,8 +6310,10 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
goto err_peer_del;
}
- ath11k_mac_11d_scan_start(ar, arvif->vdev_id, true);
-
+ if (test_bit(WMI_TLV_SERVICE_11D_OFFLOAD, ab->wmi_ab.svc_map)) {
+ reinit_completion(&ar->completed_11d_scan);
+ ar->state_11d = ATH11K_11D_PREPARING;
+ }
break;
case WMI_VDEV_TYPE_MONITOR:
set_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags);
@@ -7190,7 +7178,7 @@ ath11k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
}
if (arvif->vdev_type == WMI_VDEV_TYPE_STA)
- ath11k_mac_11d_scan_start(ar, arvif->vdev_id, false);
+ ath11k_mac_11d_scan_start(ar, arvif->vdev_id);
mutex_unlock(&ar->conf_mutex);
}
@@ -8671,8 +8659,7 @@ int ath11k_mac_allocate(struct ath11k_base *ab)
ar->monitor_vdev_id = -1;
clear_bit(ATH11K_FLAG_MONITOR_VDEV_CREATED, &ar->monitor_flags);
ar->vdev_id_11d_scan = ATH11K_11D_INVALID_VDEV_ID;
- init_completion(&ar->finish_11d_scan);
- init_completion(&ar->finish_11d_ch_list);
+ init_completion(&ar->completed_11d_scan);
}
return 0;
diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h
index 0e6c870b09c8..29b523af66dd 100644
--- a/drivers/net/wireless/ath/ath11k/mac.h
+++ b/drivers/net/wireless/ath/ath11k/mac.h
@@ -130,7 +130,7 @@ extern const struct htt_rx_ring_tlv_filter ath11k_mac_mon_status_filter_default;
#define ATH11K_SCAN_11D_INTERVAL 600000
#define ATH11K_11D_INVALID_VDEV_ID 0xFFFF
-void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id, bool wait);
+void ath11k_mac_11d_scan_start(struct ath11k *ar, u32 vdev_id);
void ath11k_mac_11d_scan_stop(struct ath11k *ar);
void ath11k_mac_11d_scan_stop_all(struct ath11k_base *ab);
diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c
index 81e11cde31d7..80a697771393 100644
--- a/drivers/net/wireless/ath/ath11k/reg.c
+++ b/drivers/net/wireless/ath/ath11k/reg.c
@@ -102,7 +102,7 @@ ath11k_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request)
ar->regdom_set_by_user = true;
}
-int ath11k_reg_update_chan_list(struct ath11k *ar)
+int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait)
{
struct ieee80211_supported_band **bands;
struct scan_chan_list_params *params;
@@ -111,7 +111,32 @@ int ath11k_reg_update_chan_list(struct ath11k *ar)
struct channel_param *ch;
enum nl80211_band band;
int num_channels = 0;
- int i, ret;
+ int i, ret, left;
+
+ if (wait && ar->state_11d != ATH11K_11D_IDLE) {
+ left = wait_for_completion_timeout(&ar->completed_11d_scan,
+ ATH11K_SCAN_TIMEOUT_HZ);
+ if (!left) {
+ ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+ "failed to receive 11d scan complete: timed out\n");
+ ar->state_11d = ATH11K_11D_IDLE;
+ }
+ ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+ "reg 11d scan wait left time %d\n", left);
+ }
+
+ if (wait &&
+ (ar->scan.state == ATH11K_SCAN_STARTING ||
+ ar->scan.state == ATH11K_SCAN_RUNNING)) {
+ left = wait_for_completion_timeout(&ar->scan.completed,
+ ATH11K_SCAN_TIMEOUT_HZ);
+ if (!left)
+ ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+ "failed to receive hw scan complete: timed out\n");
+
+ ath11k_dbg(ar->ab, ATH11K_DBG_REG,
+ "reg hw scan wait left time %d\n", left);
+ }
bands = hw->wiphy->bands;
for (band = 0; band < NUM_NL80211_BANDS; band++) {
@@ -193,11 +218,6 @@ int ath11k_reg_update_chan_list(struct ath11k *ar)
ret = ath11k_wmi_send_scan_chan_list_cmd(ar, params);
kfree(params);
- if (ar->pending_11d) {
- complete(&ar->finish_11d_ch_list);
- ar->pending_11d = false;
- }
-
return ret;
}
@@ -263,15 +283,8 @@ int ath11k_regd_update(struct ath11k *ar)
goto err;
}
- if (ar->pending_11d)
- complete(&ar->finish_11d_scan);
-
rtnl_lock();
wiphy_lock(ar->hw->wiphy);
-
- if (ar->pending_11d)
- reinit_completion(&ar->finish_11d_ch_list);
-
ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy);
wiphy_unlock(ar->hw->wiphy);
rtnl_unlock();
@@ -282,7 +295,7 @@ int ath11k_regd_update(struct ath11k *ar)
goto err;
if (ar->state == ATH11K_STATE_ON) {
- ret = ath11k_reg_update_chan_list(ar);
+ ret = ath11k_reg_update_chan_list(ar, true);
if (ret)
goto err;
}
diff --git a/drivers/net/wireless/ath/ath11k/reg.h b/drivers/net/wireless/ath/ath11k/reg.h
index 5fb9dc03a74e..2f284f26378d 100644
--- a/drivers/net/wireless/ath/ath11k/reg.h
+++ b/drivers/net/wireless/ath/ath11k/reg.h
@@ -32,5 +32,5 @@ struct ieee80211_regdomain *
ath11k_reg_build_regd(struct ath11k_base *ab,
struct cur_regulatory_info *reg_info, bool intersect);
int ath11k_regd_update(struct ath11k *ar);
-int ath11k_reg_update_chan_list(struct ath11k *ar);
+int ath11k_reg_update_chan_list(struct ath11k *ar, bool wait);
#endif
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index b4f86c45d81f..2751fe8814df 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -2015,7 +2015,10 @@ void ath11k_wmi_start_scan_init(struct ath11k *ar,
{
/* setup commonly used values */
arg->scan_req_id = 1;
- arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
+ if (ar->state_11d == ATH11K_11D_PREPARING)
+ arg->scan_priority = WMI_SCAN_PRIORITY_MEDIUM;
+ else
+ arg->scan_priority = WMI_SCAN_PRIORITY_LOW;
arg->dwell_time_active = 50;
arg->dwell_time_active_2g = 0;
arg->dwell_time_passive = 150;
@@ -6350,8 +6353,10 @@ static void ath11k_wmi_op_ep_tx_credits(struct ath11k_base *ab)
static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *skb)
{
const struct wmi_11d_new_cc_ev *ev;
+ struct ath11k *ar;
+ struct ath11k_pdev *pdev;
const void **tb;
- int ret;
+ int ret, i;
tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
if (IS_ERR(tb)) {
@@ -6377,6 +6382,13 @@ static int ath11k_reg_11d_new_cc_event(struct ath11k_base *ab, struct sk_buff *s
kfree(tb);
+ for (i = 0; i < ab->num_radios; i++) {
+ pdev = &ab->pdevs[i];
+ ar = pdev->ar;
+ ar->state_11d = ATH11K_11D_IDLE;
+ complete(&ar->completed_11d_scan);
+ }
+
queue_work(ab->workqueue, &ab->update_11d_work);
return 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 866a33f49915..3237d4b528b5 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -371,7 +371,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans)
struct iwl_dbg_tlv_timer_node *node, *tmp;
list_for_each_entry_safe(node, tmp, timer_list, list) {
- del_timer(&node->timer);
+ del_timer_sync(&node->timer);
list_del(&node->list);
kfree(node);
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 28bfa7b7b73c..e9ec63e0e395 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2202,11 +2202,14 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw,
if (!data->use_chanctx) {
confbw = data->bw;
} else {
- struct ieee80211_chanctx_conf *chanctx_conf =
- rcu_dereference(vif->chanctx_conf);
+ struct ieee80211_chanctx_conf *chanctx_conf;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(vif->chanctx_conf);
if (!WARN_ON(!chanctx_conf))
confbw = chanctx_conf->def.width;
+ rcu_read_unlock();
}
WARN(bw > hwsim_get_chanwidth(confbw),
@@ -2475,11 +2478,13 @@ static void hw_scan_work(struct work_struct *work)
if (req->ie_len)
skb_put_data(probe, req->ie, req->ie_len);
+ rcu_read_lock();
if (!ieee80211_tx_prepare_skb(hwsim->hw,
hwsim->hw_scan_vif,
probe,
hwsim->tmp_chan->band,
NULL)) {
+ rcu_read_unlock();
kfree_skb(probe);
continue;
}
@@ -2487,6 +2492,7 @@ static void hw_scan_work(struct work_struct *work)
local_bh_disable();
mac80211_hwsim_tx_frame(hwsim->hw, probe,
hwsim->tmp_chan);
+ rcu_read_unlock();
local_bh_enable();
}
}
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index 2fcf545012b1..1a5284de4341 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -183,6 +183,7 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
{
struct nci_dev *ndev = priv->ndev;
+ nci_unregister_device(ndev);
if (priv->ndev->nfc_dev->fw_download_in_progress)
nfcmrvl_fw_dnld_abort(priv);
@@ -191,7 +192,6 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
if (gpio_is_valid(priv->config.reset_n_io))
gpio_free(priv->config.reset_n_io);
- nci_unregister_device(ndev);
nci_free_device(ndev);
kfree(priv);
}
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index a491db46e3bd..d9f6367b9993 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -2787,13 +2787,14 @@ void pn53x_common_clean(struct pn533 *priv)
{
struct pn533_cmd *cmd, *n;
+ /* delete the timer before cleanup the worker */
+ del_timer_sync(&priv->listen_timer);
+
flush_delayed_work(&priv->poll_work);
destroy_workqueue(priv->wq);
skb_queue_purge(&priv->resp_q);
- del_timer(&priv->listen_timer);
-
list_for_each_entry_safe(cmd, n, &priv->cmd_queue, queue) {
list_del(&cmd->queue);
kfree(cmd);
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index 7d49eb34b348..4910543f00ff 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -4,7 +4,6 @@
* Copyright (c) 2022, Oracle and/or its affiliates
*/
-#include <linux/blkdev.h>
#include "nvme.h"
#ifdef CONFIG_NVME_VERBOSE_ERRORS
@@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = {
[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
+ [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
[NVME_SC_LBA_RANGE] = "LBA Out of Range",
[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
@@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = {
[NVME_SC_COMPARE_FAILED] = "Compare Failure",
[NVME_SC_ACCESS_DENIED] = "Access Denied",
[NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
+ [NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error",
[NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
[NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
[NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
+ [NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error",
[NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
+ [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
};
const unsigned char *nvme_get_error_status_str(u16 status)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e1846d04817f..72f7c955c707 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
rq->timeout = ctrl->kato * HZ;
rq->end_io_data = ctrl;
+ rq->rq_flags |= RQF_QUIET;
blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
}
@@ -1426,6 +1427,32 @@ out_free_id:
return error;
}
+static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
+ struct nvme_id_ns_cs_indep **id)
+{
+ struct nvme_command c = {
+ .identify.opcode = nvme_admin_identify,
+ .identify.nsid = cpu_to_le32(nsid),
+ .identify.cns = NVME_ID_CNS_NS_CS_INDEP,
+ };
+ int ret;
+
+ *id = kmalloc(sizeof(**id), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
+ if (ret) {
+ dev_warn(ctrl->device,
+ "Identify namespace (CS independent) failed (%d)\n",
+ ret);
+ kfree(*id);
+ return ret;
+ }
+
+ return 0;
+}
+
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen, u32 *result)
{
@@ -1621,20 +1648,22 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
u32 size = queue_logical_block_size(queue);
if (ctrl->max_discard_sectors == 0) {
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
+ blk_queue_max_discard_sectors(queue, 0);
return;
}
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- queue->limits.discard_alignment = 0;
queue->limits.discard_granularity = size;
/* If discard is already enabled, don't reset queue limits */
- if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
+ if (queue->limits.max_discard_sectors)
return;
+ if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
+ ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
+
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
@@ -1771,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
- blk_queue_dma_alignment(q, 7);
+ blk_queue_dma_alignment(q, 3);
blk_queue_write_cache(q, vwc, vwc);
}
@@ -2100,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
-static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
{
- unsigned long timeout =
- ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+ unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
int ret;
@@ -2116,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
usleep_range(1000, 2000);
if (fatal_signal_pending(current))
return -EINTR;
- if (time_after(jiffies, timeout)) {
+ if (time_after(jiffies, timeout_jiffies)) {
dev_err(ctrl->device,
"Device not ready; aborting %s, CSTS=0x%x\n",
enabled ? "initialisation" : "reset", csts);
@@ -2147,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
- return nvme_wait_ready(ctrl, ctrl->cap, false);
+ return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
}
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{
unsigned dev_page_min;
+ u32 timeout;
int ret;
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
@@ -2174,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ctrl->ctrl_config = NVME_CC_CSS_CSI;
else
ctrl->ctrl_config = NVME_CC_CSS_NVM;
+
+ if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
+ u32 crto;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
+ if (ret) {
+ dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
+ ret);
+ return ret;
+ }
+
+ if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
+ ctrl->ctrl_config |= NVME_CC_CRIME;
+ timeout = NVME_CRTO_CRIMT(crto);
+ } else {
+ timeout = NVME_CRTO_CRWMT(crto);
+ }
+ } else {
+ timeout = NVME_CAP_TIMEOUT(ctrl->cap);
+ }
+
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
@@ -2182,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
- return nvme_wait_ready(ctrl, ctrl->cap, true);
+ return nvme_wait_ready(ctrl, timeout, true);
}
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
@@ -2894,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
if (id->dmrl)
ctrl->max_discard_segments = id->dmrl;
- if (id->dmrsl)
- ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
+ ctrl->dmrsl = le32_to_cpu(id->dmrsl);
if (id->wzsl)
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
@@ -3080,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
if (ret)
return ret;
- ret = nvme_init_non_mdts_limits(ctrl);
- if (ret < 0)
- return ret;
-
ret = nvme_configure_apst(ctrl);
if (ret < 0)
return ret;
@@ -3146,6 +3191,7 @@ static const struct file_operations nvme_dev_fops = {
.release = nvme_dev_release,
.unlocked_ioctl = nvme_dev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
+ .uring_cmd = nvme_dev_uring_cmd,
};
static ssize_t nvme_sysfs_reset(struct device *dev,
@@ -3699,6 +3745,7 @@ static const struct file_operations nvme_ns_chr_fops = {
.release = nvme_ns_chr_release,
.unlocked_ioctl = nvme_ns_chr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
+ .uring_cmd = nvme_ns_chr_uring_cmd,
};
static int nvme_add_ns_cdev(struct nvme_ns *ns)
@@ -4090,11 +4137,26 @@ out:
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns_ids ids = { };
+ struct nvme_id_ns_cs_indep *id;
struct nvme_ns *ns;
+ bool ready = true;
if (nvme_identify_ns_descs(ctrl, nsid, &ids))
return;
+ /*
+ * Check if the namespace is ready. If not ignore it, we will get an
+ * AEN once it becomes ready and restart the scan.
+ */
+ if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
+ !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
+ ready = id->nstat & NVME_NSTAT_NRDY;
+ kfree(id);
+ }
+
+ if (!ready)
+ return;
+
ns = nvme_find_get_ns(ctrl, nsid);
if (ns) {
nvme_validate_ns(ns, &ids);
@@ -4237,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, scan_work);
+ int ret;
/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;
+ /*
+ * Identify controller limits can change at controller reset due to
+ * new firmware download, even though it is not common we cannot ignore
+ * such scenario. Controller's non-mdts limits are reported in the unit
+ * of logical blocks that is dependent on the format of attached
+ * namespace. Hence re-read the limits at the time of ns allocation.
+ */
+ ret = nvme_init_non_mdts_limits(ctrl);
+ if (ret < 0) {
+ dev_warn(ctrl->device,
+ "reading non-mdts-limits failed: %d\n", ret);
+ return;
+ }
+
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
@@ -4839,6 +4916,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
+ NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 1e3a09cad961..46d6e194ac2b 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
return ctrl->subsys->subnqn;
}
+static inline void nvmf_complete_timed_out_request(struct request *rq)
+{
+ if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
+ nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+ blk_mq_complete_request(rq);
+ }
+}
+
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 080f85f4105f..7ae72c7a211b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3831,6 +3831,9 @@ process_local_list:
return count;
}
+static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
+
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
/* Parse the cgroup id from a buf and return the length of cgrpid */
static int fc_parse_cgrpid(const char *buf, u64 *id)
{
@@ -3854,12 +3857,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id)
}
/*
- * fc_update_appid: Parse and update the appid in the blkcg associated with
- * cgroupid.
- * @buf: buf contains both cgrpid and appid info
- * @count: size of the buffer
+ * Parse and update the appid in the blkcg associated with the cgroupid.
*/
-static int fc_update_appid(const char *buf, size_t count)
+static ssize_t fc_appid_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
{
u64 cgrp_id;
int appid_len = 0;
@@ -3887,23 +3888,14 @@ static int fc_update_appid(const char *buf, size_t count)
return ret;
return count;
}
-
-static ssize_t fc_appid_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- int ret = 0;
-
- ret = fc_update_appid(buf, count);
- if (ret < 0)
- return -EINVAL;
- return count;
-}
-static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
+#endif /* CONFIG_BLK_CGROUP_FC_APPID */
static struct attribute *nvme_fc_attrs[] = {
&dev_attr_nvme_discovery.attr,
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
&dev_attr_appid_store.attr,
+#endif
NULL
};
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 554566371ffa..096b1b47d750 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -5,6 +5,7 @@
*/
#include <linux/ptrace.h> /* for force_successful_syscall_return */
#include <linux/nvme_ioctl.h>
+#include <linux/io_uring.h>
#include "nvme.h"
/*
@@ -53,10 +54,21 @@ out:
return ERR_PTR(ret);
}
-static int nvme_submit_user_cmd(struct request_queue *q,
+static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
+ void *meta, unsigned len, int ret)
+{
+ if (!ret && req_op(req) == REQ_OP_DRV_IN &&
+ copy_to_user(ubuf, meta, len))
+ ret = -EFAULT;
+ kfree(meta);
+ return ret;
+}
+
+static struct request *nvme_alloc_user_request(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
- u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+ u32 meta_seed, void **metap, unsigned timeout, bool vec,
+ unsigned int rq_flags, blk_mq_req_flags_t blk_flags)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
@@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
void *meta = NULL;
int ret;
- req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0);
+ req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags);
if (IS_ERR(req))
- return PTR_ERR(req);
+ return req;
nvme_init_request(req, cmd);
if (timeout)
@@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q,
goto out_unmap;
}
req->cmd_flags |= REQ_INTEGRITY;
+ *metap = meta;
}
}
+ return req;
+
+out_unmap:
+ if (bio)
+ blk_rq_unmap_user(bio);
+out:
+ blk_mq_free_request(req);
+ return ERR_PTR(ret);
+}
+
+static int nvme_submit_user_cmd(struct request_queue *q,
+ struct nvme_command *cmd, void __user *ubuffer,
+ unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
+ u32 meta_seed, u64 *result, unsigned timeout, bool vec)
+{
+ struct request *req;
+ void *meta = NULL;
+ struct bio *bio;
+ int ret;
+
+ req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer,
+ meta_len, meta_seed, &meta, timeout, vec, 0, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ bio = req->bio;
+
ret = nvme_execute_passthru_rq(req);
+
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
- if (meta && !ret && !write) {
- if (copy_to_user(meta_buffer, meta, meta_len))
- ret = -EFAULT;
- }
- kfree(meta);
- out_unmap:
+ if (meta)
+ ret = nvme_finish_user_metadata(req, meta_buffer, meta,
+ meta_len, ret);
if (bio)
blk_rq_unmap_user(bio);
- out:
blk_mq_free_request(req);
return ret;
}
-
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
@@ -296,6 +332,139 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return status;
}
+struct nvme_uring_data {
+ __u64 metadata;
+ __u64 addr;
+ __u32 data_len;
+ __u32 metadata_len;
+ __u32 timeout_ms;
+};
+
+/*
+ * This overlays struct io_uring_cmd pdu.
+ * Expect build errors if this grows larger than that.
+ */
+struct nvme_uring_cmd_pdu {
+ union {
+ struct bio *bio;
+ struct request *req;
+ };
+ void *meta; /* kernel-resident buffer */
+ void __user *meta_buffer;
+ u32 meta_len;
+};
+
+static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
+ struct io_uring_cmd *ioucmd)
+{
+ return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
+}
+
+static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
+{
+ struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+ struct request *req = pdu->req;
+ struct bio *bio = req->bio;
+ int status;
+ u64 result;
+
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ status = -EINTR;
+ else
+ status = nvme_req(req)->status;
+
+ result = le64_to_cpu(nvme_req(req)->result.u64);
+
+ if (pdu->meta)
+ status = nvme_finish_user_metadata(req, pdu->meta_buffer,
+ pdu->meta, pdu->meta_len, status);
+ if (bio)
+ blk_rq_unmap_user(bio);
+ blk_mq_free_request(req);
+
+ io_uring_cmd_done(ioucmd, status, result);
+}
+
+static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err)
+{
+ struct io_uring_cmd *ioucmd = req->end_io_data;
+ struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+ /* extract bio before reusing the same field for request */
+ struct bio *bio = pdu->bio;
+
+ pdu->req = req;
+ req->bio = bio;
+ /* this takes care of moving rest of completion-work to task context */
+ io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
+}
+
+static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
+{
+ struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+ const struct nvme_uring_cmd *cmd = ioucmd->cmd;
+ struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
+ struct nvme_uring_data d;
+ struct nvme_command c;
+ struct request *req;
+ unsigned int rq_flags = 0;
+ blk_mq_req_flags_t blk_flags = 0;
+ void *meta = NULL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ c.common.opcode = READ_ONCE(cmd->opcode);
+ c.common.flags = READ_ONCE(cmd->flags);
+ if (c.common.flags)
+ return -EINVAL;
+
+ c.common.command_id = 0;
+ c.common.nsid = cpu_to_le32(cmd->nsid);
+ if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
+ return -EINVAL;
+
+ c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
+ c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
+ c.common.metadata = 0;
+ c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
+ c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
+ c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
+ c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
+ c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
+ c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
+ c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
+
+ d.metadata = READ_ONCE(cmd->metadata);
+ d.addr = READ_ONCE(cmd->addr);
+ d.data_len = READ_ONCE(cmd->data_len);
+ d.metadata_len = READ_ONCE(cmd->metadata_len);
+ d.timeout_ms = READ_ONCE(cmd->timeout_ms);
+
+ if (issue_flags & IO_URING_F_NONBLOCK) {
+ rq_flags = REQ_NOWAIT;
+ blk_flags = BLK_MQ_REQ_NOWAIT;
+ }
+
+ req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr),
+ d.data_len, nvme_to_user_ptr(d.metadata),
+ d.metadata_len, 0, &meta, d.timeout_ms ?
+ msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags,
+ blk_flags);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->end_io_data = ioucmd;
+
+ /* to free bio on completion, as req->bio will be null at that time */
+ pdu->bio = req->bio;
+ pdu->meta = meta;
+ pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
+ pdu->meta_len = d.metadata_len;
+
+ blk_execute_rq_nowait(req, 0, nvme_uring_cmd_end_io);
+ return -EIOCBQUEUED;
+}
+
static bool is_ctrl_ioctl(unsigned int cmd)
{
if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
@@ -387,6 +556,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return __nvme_ioctl(ns, cmd, (void __user *)arg);
}
+static int nvme_uring_cmd_checks(unsigned int issue_flags)
+{
+ /* IOPOLL not supported yet */
+ if (issue_flags & IO_URING_F_IOPOLL)
+ return -EOPNOTSUPP;
+
+ /* NVMe passthrough requires big SQE/CQE support */
+ if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) !=
+ (IO_URING_F_SQE128|IO_URING_F_CQE32))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
+ unsigned int issue_flags)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
+
+ ret = nvme_uring_cmd_checks(issue_flags);
+ if (ret)
+ return ret;
+
+ switch (ioucmd->cmd_op) {
+ case NVME_URING_CMD_IO:
+ ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false);
+ break;
+ case NVME_URING_CMD_IO_VEC:
+ ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true);
+ break;
+ default:
+ ret = -ENOTTY;
+ }
+
+ return ret;
+}
+
+int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+ struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev,
+ struct nvme_ns, cdev);
+
+ return nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
+}
+
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx)
@@ -453,8 +669,46 @@ out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
}
+
+int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+ unsigned int issue_flags)
+{
+ struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+ struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
+ int srcu_idx = srcu_read_lock(&head->srcu);
+ struct nvme_ns *ns = nvme_find_path(head);
+ int ret = -EINVAL;
+
+ if (ns)
+ ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags);
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
#endif /* CONFIG_NVME_MULTIPATH */
+int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
+{
+ struct nvme_ctrl *ctrl = ioucmd->file->private_data;
+ int ret;
+
+ ret = nvme_uring_cmd_checks(issue_flags);
+ if (ret)
+ return ret;
+
+ switch (ioucmd->cmd_op) {
+ case NVME_URING_CMD_ADMIN:
+ ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false);
+ break;
+ case NVME_URING_CMD_ADMIN_VEC:
+ ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true);
+ break;
+ default:
+ ret = -ENOTTY;
+ }
+
+ return ret;
+}
+
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
{
struct nvme_ns *ns;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d464fdf978fb..d3e2440d8abb 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
.release = nvme_ns_head_chr_release,
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
+ .uring_cmd = nvme_ns_head_chr_uring_cmd,
};
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a2b53ca63335..9b72b6ecf33c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -284,6 +284,7 @@ struct nvme_ctrl {
#endif
u16 crdt[3];
u16 oncs;
+ u32 dmrsl;
u16 oacs;
u16 sqsize;
u32 max_namespaces;
@@ -782,7 +783,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
+int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+ unsigned int issue_flags);
+int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+ unsigned int issue_flags);
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct pr_ops nvme_pr_ops;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3aacf1c0d5a5..5a98a7de0964 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_init_request(abort_req, &cmd);
abort_req->end_io_data = NULL;
+ abort_req->rq_flags |= RQF_QUIET;
blk_execute_rq_nowait(abort_req, false, abort_endio);
/*
@@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
+ dev->ctrl.admin_q = NULL;
return -ENOMEM;
}
if (!blk_get_queue(dev->ctrl.admin_q)) {
@@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
req->end_io_data = nvmeq;
init_completion(&nvmeq->delete_done);
+ req->rq_flags |= RQF_QUIET;
blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
nvme_del_cq_end : nvme_del_queue_end);
return 0;
@@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
struct pci_dev *pdev = to_pci_dev(dev->dev);
mutex_lock(&dev->shutdown_lock);
- if (pci_is_enabled(pdev)) {
+ if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
if (dev->ctrl.state == NVME_CTRL_LIVE ||
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d9f19d901313..b87c8ae41d9b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
struct nvme_rdma_queue *queue = req->queue;
nvme_rdma_stop_queue(queue);
- if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
- nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
- blk_mq_complete_request(rq);
- }
+ nvmf_complete_timed_out_request(rq);
}
static enum blk_eh_timer_return
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index ad3a2bf2f1e9..bb67538d241b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
- if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
- nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
- blk_mq_complete_request(rq);
- }
+ nvmf_complete_timed_out_request(rq);
}
static enum blk_eh_timer_return
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index d886c2c59554..27a72504d31c 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
ret = __blkdev_issue_discard(ns->bdev,
nvmet_lba_to_sect(ns, range->slba),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
- GFP_KERNEL, 0, bio);
+ GFP_KERNEL, bio);
if (ret && ret != -EOPNOTSUPP) {
req->error_slba = le64_to_cpu(range->slba);
return errno_to_nvme_status(req, ret);
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index e34718b09550..82b61acf7a72 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z,
bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
{
- struct request_queue *q = ns->bdev->bd_disk->queue;
- u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
+ u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
struct gendisk *bd_disk = ns->bdev->bd_disk;
int ret;
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 6ab90891801d..816028c0f6ed 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1550,6 +1550,11 @@ static const struct qcom_pcie_cfg sc7280_cfg = {
.pipe_clk_need_muxing = true,
};
+static const struct qcom_pcie_cfg sc8180x_cfg = {
+ .ops = &ops_1_9_0,
+ .has_tbu_clk = true,
+};
+
static const struct dw_pcie_ops dw_pcie_ops = {
.link_up = qcom_pcie_link_up,
.start_link = qcom_pcie_start_link,
@@ -1656,7 +1661,7 @@ static const struct of_device_id qcom_pcie_match[] = {
{ .compatible = "qcom,pcie-qcs404", .data = &ipq4019_cfg },
{ .compatible = "qcom,pcie-sdm845", .data = &sdm845_cfg },
{ .compatible = "qcom,pcie-sm8250", .data = &sm8250_cfg },
- { .compatible = "qcom,pcie-sc8180x", .data = &sm8250_cfg },
+ { .compatible = "qcom,pcie-sc8180x", .data = &sc8180x_cfg },
{ .compatible = "qcom,pcie-sm8450-pcie0", .data = &sm8450_pcie0_cfg },
{ .compatible = "qcom,pcie-sm8450-pcie1", .data = &sm8450_pcie1_cfg },
{ .compatible = "qcom,pcie-sc7280", .data = &sc7280_cfg },
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 09d9bf465d72..ffec82c8a523 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -272,7 +272,6 @@ struct advk_pcie {
u32 actions;
} wins[OB_WIN_COUNT];
u8 wins_count;
- int irq;
struct irq_domain *rp_irq_domain;
struct irq_domain *irq_domain;
struct irq_chip irq_chip;
@@ -1570,26 +1569,21 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie)
}
}
-static void advk_pcie_irq_handler(struct irq_desc *desc)
+static irqreturn_t advk_pcie_irq_handler(int irq, void *arg)
{
- struct advk_pcie *pcie = irq_desc_get_handler_data(desc);
- struct irq_chip *chip = irq_desc_get_chip(desc);
- u32 val, mask, status;
+ struct advk_pcie *pcie = arg;
+ u32 status;
- chained_irq_enter(chip, desc);
+ status = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
+ if (!(status & PCIE_IRQ_CORE_INT))
+ return IRQ_NONE;
- val = advk_readl(pcie, HOST_CTRL_INT_STATUS_REG);
- mask = advk_readl(pcie, HOST_CTRL_INT_MASK_REG);
- status = val & ((~mask) & PCIE_IRQ_ALL_MASK);
+ advk_pcie_handle_int(pcie);
- if (status & PCIE_IRQ_CORE_INT) {
- advk_pcie_handle_int(pcie);
+ /* Clear interrupt */
+ advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
- /* Clear interrupt */
- advk_writel(pcie, PCIE_IRQ_CORE_INT, HOST_CTRL_INT_STATUS_REG);
- }
-
- chained_irq_exit(chip, desc);
+ return IRQ_HANDLED;
}
static int advk_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
@@ -1669,7 +1663,7 @@ static int advk_pcie_probe(struct platform_device *pdev)
struct advk_pcie *pcie;
struct pci_host_bridge *bridge;
struct resource_entry *entry;
- int ret;
+ int ret, irq;
bridge = devm_pci_alloc_host_bridge(dev, sizeof(struct advk_pcie));
if (!bridge)
@@ -1755,9 +1749,17 @@ static int advk_pcie_probe(struct platform_device *pdev)
if (IS_ERR(pcie->base))
return PTR_ERR(pcie->base);
- pcie->irq = platform_get_irq(pdev, 0);
- if (pcie->irq < 0)
- return pcie->irq;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(dev, irq, advk_pcie_irq_handler,
+ IRQF_SHARED | IRQF_NO_THREAD, "advk-pcie",
+ pcie);
+ if (ret) {
+ dev_err(dev, "Failed to register interrupt\n");
+ return ret;
+ }
pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node,
"reset-gpios", 0,
@@ -1814,15 +1816,12 @@ static int advk_pcie_probe(struct platform_device *pdev)
return ret;
}
- irq_set_chained_handler_and_data(pcie->irq, advk_pcie_irq_handler, pcie);
-
bridge->sysdata = pcie;
bridge->ops = &advk_pcie_ops;
bridge->map_irq = advk_pcie_map_irq;
ret = pci_host_probe(bridge);
if (ret < 0) {
- irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
advk_pcie_remove_rp_irq_domain(pcie);
advk_pcie_remove_msi_irq_domain(pcie);
advk_pcie_remove_irq_domain(pcie);
@@ -1871,9 +1870,6 @@ static int advk_pcie_remove(struct platform_device *pdev)
advk_writel(pcie, PCIE_ISR1_ALL_MASK, PCIE_ISR1_REG);
advk_writel(pcie, PCIE_IRQ_ALL_MASK, HOST_CTRL_INT_STATUS_REG);
- /* Remove IRQ handler */
- irq_set_chained_handler_and_data(pcie->irq, NULL, NULL);
-
/* Remove IRQ domains */
advk_pcie_remove_rp_irq_domain(pcie);
advk_pcie_remove_msi_irq_domain(pcie);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9ecce435fb3f..d25122fbe98a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2920,6 +2920,16 @@ static const struct dmi_system_id bridge_d3_blacklist[] = {
DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"),
},
+ /*
+ * Downstream device is not accessible after putting a root port
+ * into D3cold and back into D0 on Elo i2.
+ */
+ .ident = "Elo i2",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"),
+ },
},
#endif
{ }
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 9694370651fa..59d3980b8ca2 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -400,6 +400,9 @@ validate_group(struct perf_event *event)
if (!validate_event(event->pmu, &fake_pmu, leader))
return -EINVAL;
+ if (event == leader)
+ return 0;
+
for_each_sibling_event(sibling, leader) {
if (!validate_event(event->pmu, &fake_pmu, sibling))
return -EINVAL;
@@ -489,12 +492,7 @@ __hw_perf_event_init(struct perf_event *event)
local64_set(&hwc->period_left, hwc->sample_period);
}
- if (event->group_leader != event) {
- if (validate_group(event) != 0)
- return -EINVAL;
- }
-
- return 0;
+ return validate_group(event);
}
static int armpmu_event_init(struct perf_event *event)
diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
index 5b471ab80fe2..54d65a6f0fcc 100644
--- a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
+++ b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
@@ -414,19 +414,19 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev)
ret = clk_prepare_enable(priv->clk_ref);
if (ret)
- goto err_disable_clk_ref;
+ return ret;
priv->reset = devm_reset_control_array_get_exclusive(dev);
- if (IS_ERR(priv->reset))
- return PTR_ERR(priv->reset);
+ if (IS_ERR(priv->reset)) {
+ ret = PTR_ERR(priv->reset);
+ goto err_disable_clk_ref;
+ }
priv->phy = devm_phy_create(dev, np, &phy_g12a_usb3_pcie_ops);
if (IS_ERR(priv->phy)) {
ret = PTR_ERR(priv->phy);
- if (ret != -EPROBE_DEFER)
- dev_err(dev, "failed to create PHY\n");
-
- return ret;
+ dev_err_probe(dev, ret, "failed to create PHY\n");
+ goto err_disable_clk_ref;
}
phy_set_drvdata(priv->phy, priv);
@@ -434,8 +434,12 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev)
phy_provider = devm_of_phy_provider_register(dev,
phy_g12a_usb3_pcie_xlate);
+ if (IS_ERR(phy_provider)) {
+ ret = PTR_ERR(phy_provider);
+ goto err_disable_clk_ref;
+ }
- return PTR_ERR_OR_ZERO(phy_provider);
+ return 0;
err_disable_clk_ref:
clk_disable_unprepare(priv->clk_ref);
diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c
index 5172971f4c36..3cd4d51c247c 100644
--- a/drivers/phy/motorola/phy-mapphone-mdm6600.c
+++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c
@@ -629,7 +629,8 @@ idle:
cleanup:
if (error < 0)
phy_mdm6600_device_power_off(ddata);
-
+ pm_runtime_disable(ddata->dev);
+ pm_runtime_dont_use_autosuspend(ddata->dev);
return error;
}
diff --git a/drivers/phy/samsung/phy-exynos5250-sata.c b/drivers/phy/samsung/phy-exynos5250-sata.c
index 9ec234243f7c..595adba5fb8f 100644
--- a/drivers/phy/samsung/phy-exynos5250-sata.c
+++ b/drivers/phy/samsung/phy-exynos5250-sata.c
@@ -187,6 +187,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
return -EINVAL;
sata_phy->client = of_find_i2c_device_by_node(node);
+ of_node_put(node);
if (!sata_phy->client)
return -EPROBE_DEFER;
@@ -195,20 +196,21 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
sata_phy->phyclk = devm_clk_get(dev, "sata_phyctrl");
if (IS_ERR(sata_phy->phyclk)) {
dev_err(dev, "failed to get clk for PHY\n");
- return PTR_ERR(sata_phy->phyclk);
+ ret = PTR_ERR(sata_phy->phyclk);
+ goto put_dev;
}
ret = clk_prepare_enable(sata_phy->phyclk);
if (ret < 0) {
dev_err(dev, "failed to enable source clk\n");
- return ret;
+ goto put_dev;
}
sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops);
if (IS_ERR(sata_phy->phy)) {
- clk_disable_unprepare(sata_phy->phyclk);
dev_err(dev, "failed to create PHY\n");
- return PTR_ERR(sata_phy->phy);
+ ret = PTR_ERR(sata_phy->phy);
+ goto clk_disable;
}
phy_set_drvdata(sata_phy->phy, sata_phy);
@@ -216,11 +218,18 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
phy_provider = devm_of_phy_provider_register(dev,
of_phy_simple_xlate);
if (IS_ERR(phy_provider)) {
- clk_disable_unprepare(sata_phy->phyclk);
- return PTR_ERR(phy_provider);
+ ret = PTR_ERR(phy_provider);
+ goto clk_disable;
}
return 0;
+
+clk_disable:
+ clk_disable_unprepare(sata_phy->phyclk);
+put_dev:
+ put_device(&sata_phy->client->dev);
+
+ return ret;
}
static const struct of_device_id exynos_sata_phy_of_match[] = {
diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c
index c1211c4f863c..0be727bb9f79 100644
--- a/drivers/phy/ti/phy-am654-serdes.c
+++ b/drivers/phy/ti/phy-am654-serdes.c
@@ -838,7 +838,7 @@ static int serdes_am654_probe(struct platform_device *pdev)
clk_err:
of_clk_del_provider(node);
-
+ pm_runtime_disable(dev);
return ret;
}
diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c
index 3a505fe5715a..31a775877f6e 100644
--- a/drivers/phy/ti/phy-omap-usb2.c
+++ b/drivers/phy/ti/phy-omap-usb2.c
@@ -215,7 +215,7 @@ static int omap_usb2_enable_clocks(struct omap_usb *phy)
return 0;
err1:
- clk_disable(phy->wkupclk);
+ clk_disable_unprepare(phy->wkupclk);
err0:
return ret;
diff --git a/drivers/phy/ti/phy-ti-pipe3.c b/drivers/phy/ti/phy-ti-pipe3.c
index 2cbc91e535d4..f502c36f3be5 100644
--- a/drivers/phy/ti/phy-ti-pipe3.c
+++ b/drivers/phy/ti/phy-ti-pipe3.c
@@ -696,6 +696,7 @@ static int ti_pipe3_get_sysctrl(struct ti_pipe3 *phy)
}
control_pdev = of_find_device_by_node(control_node);
+ of_node_put(control_node);
if (!control_pdev) {
dev_err(dev, "Failed to get control device\n");
return -EINVAL;
diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c
index a0cdbcadf09e..c3ab4b69ea68 100644
--- a/drivers/phy/ti/phy-tusb1210.c
+++ b/drivers/phy/ti/phy-tusb1210.c
@@ -155,7 +155,7 @@ static int tusb1210_set_mode(struct phy *phy, enum phy_mode mode, int submode)
}
#ifdef CONFIG_POWER_SUPPLY
-const char * const tusb1210_chg_det_states[] = {
+static const char * const tusb1210_chg_det_states[] = {
"CHG_DET_CONNECTING",
"CHG_DET_START_DET",
"CHG_DET_READ_DET",
@@ -537,12 +537,18 @@ static int tusb1210_probe(struct ulpi *ulpi)
tusb1210_probe_charger_detect(tusb);
tusb->phy = ulpi_phy_create(ulpi, &phy_ops);
- if (IS_ERR(tusb->phy))
- return PTR_ERR(tusb->phy);
+ if (IS_ERR(tusb->phy)) {
+ ret = PTR_ERR(tusb->phy);
+ goto err_remove_charger;
+ }
phy_set_drvdata(tusb->phy, tusb);
ulpi_set_drvdata(ulpi, tusb);
return 0;
+
+err_remove_charger:
+ tusb1210_remove_charger_detect(tusb);
+ return ret;
}
static void tusb1210_remove(struct ulpi *ulpi)
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
index a3fa03bcd9a3..80838dc54b3a 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
@@ -1236,18 +1236,17 @@ FUNC_GROUP_DECL(SALT8, AA12);
FUNC_GROUP_DECL(WDTRST4, AA12);
#define AE12 196
-SIG_EXPR_LIST_DECL_SEMG(AE12, FWSPIDQ2, FWQSPID, FWSPID,
- SIG_DESC_SET(SCU438, 4));
+SIG_EXPR_LIST_DECL_SESG(AE12, FWSPIQ2, FWQSPI, SIG_DESC_SET(SCU438, 4));
SIG_EXPR_LIST_DECL_SESG(AE12, GPIOY4, GPIOY4);
-PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIDQ2),
+PIN_DECL_(AE12, SIG_EXPR_LIST_PTR(AE12, FWSPIQ2),
SIG_EXPR_LIST_PTR(AE12, GPIOY4));
#define AF12 197
-SIG_EXPR_LIST_DECL_SEMG(AF12, FWSPIDQ3, FWQSPID, FWSPID,
- SIG_DESC_SET(SCU438, 5));
+SIG_EXPR_LIST_DECL_SESG(AF12, FWSPIQ3, FWQSPI, SIG_DESC_SET(SCU438, 5));
SIG_EXPR_LIST_DECL_SESG(AF12, GPIOY5, GPIOY5);
-PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIDQ3),
+PIN_DECL_(AF12, SIG_EXPR_LIST_PTR(AF12, FWSPIQ3),
SIG_EXPR_LIST_PTR(AF12, GPIOY5));
+FUNC_GROUP_DECL(FWQSPI, AE12, AF12);
#define AC12 198
SSSF_PIN_DECL(AC12, GPIOY6, FWSPIABR, SIG_DESC_SET(SCU438, 6));
@@ -1520,9 +1519,8 @@ SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
-GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
-FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
+FUNC_DECL_1(FWSPID, FWSPID);
FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
/*
@@ -1918,7 +1916,7 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = {
ASPEED_PINCTRL_GROUP(FSI2),
ASPEED_PINCTRL_GROUP(FWSPIABR),
ASPEED_PINCTRL_GROUP(FWSPID),
- ASPEED_PINCTRL_GROUP(FWQSPID),
+ ASPEED_PINCTRL_GROUP(FWQSPI),
ASPEED_PINCTRL_GROUP(FWSPIWP),
ASPEED_PINCTRL_GROUP(GPIT0),
ASPEED_PINCTRL_GROUP(GPIT1),
@@ -2160,6 +2158,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
ASPEED_PINCTRL_FUNC(FSI2),
ASPEED_PINCTRL_FUNC(FWSPIABR),
ASPEED_PINCTRL_FUNC(FWSPID),
+ ASPEED_PINCTRL_FUNC(FWQSPI),
ASPEED_PINCTRL_FUNC(FWSPIWP),
ASPEED_PINCTRL_FUNC(GPIT0),
ASPEED_PINCTRL_FUNC(GPIT1),
diff --git a/drivers/pinctrl/intel/pinctrl-alderlake.c b/drivers/pinctrl/intel/pinctrl-alderlake.c
index 32ba50efbceb..62dbd1e67513 100644
--- a/drivers/pinctrl/intel/pinctrl-alderlake.c
+++ b/drivers/pinctrl/intel/pinctrl-alderlake.c
@@ -14,11 +14,17 @@
#include "pinctrl-intel.h"
-#define ADL_PAD_OWN 0x0a0
-#define ADL_PADCFGLOCK 0x110
-#define ADL_HOSTSW_OWN 0x150
-#define ADL_GPI_IS 0x200
-#define ADL_GPI_IE 0x220
+#define ADL_N_PAD_OWN 0x020
+#define ADL_N_PADCFGLOCK 0x080
+#define ADL_N_HOSTSW_OWN 0x0b0
+#define ADL_N_GPI_IS 0x100
+#define ADL_N_GPI_IE 0x120
+
+#define ADL_S_PAD_OWN 0x0a0
+#define ADL_S_PADCFGLOCK 0x110
+#define ADL_S_HOSTSW_OWN 0x150
+#define ADL_S_GPI_IS 0x200
+#define ADL_S_GPI_IE 0x220
#define ADL_GPP(r, s, e, g) \
{ \
@@ -28,14 +34,28 @@
.gpio_base = (g), \
}
-#define ADL_COMMUNITY(b, s, e, g) \
+#define ADL_N_COMMUNITY(b, s, e, g) \
+ { \
+ .barno = (b), \
+ .padown_offset = ADL_N_PAD_OWN, \
+ .padcfglock_offset = ADL_N_PADCFGLOCK, \
+ .hostown_offset = ADL_N_HOSTSW_OWN, \
+ .is_offset = ADL_N_GPI_IS, \
+ .ie_offset = ADL_N_GPI_IE, \
+ .pin_base = (s), \
+ .npins = ((e) - (s) + 1), \
+ .gpps = (g), \
+ .ngpps = ARRAY_SIZE(g), \
+ }
+
+#define ADL_S_COMMUNITY(b, s, e, g) \
{ \
.barno = (b), \
- .padown_offset = ADL_PAD_OWN, \
- .padcfglock_offset = ADL_PADCFGLOCK, \
- .hostown_offset = ADL_HOSTSW_OWN, \
- .is_offset = ADL_GPI_IS, \
- .ie_offset = ADL_GPI_IE, \
+ .padown_offset = ADL_S_PAD_OWN, \
+ .padcfglock_offset = ADL_S_PADCFGLOCK, \
+ .hostown_offset = ADL_S_HOSTSW_OWN, \
+ .is_offset = ADL_S_GPI_IS, \
+ .ie_offset = ADL_S_GPI_IE, \
.pin_base = (s), \
.npins = ((e) - (s) + 1), \
.gpps = (g), \
@@ -342,10 +362,10 @@ static const struct intel_padgroup adln_community5_gpps[] = {
};
static const struct intel_community adln_communities[] = {
- ADL_COMMUNITY(0, 0, 66, adln_community0_gpps),
- ADL_COMMUNITY(1, 67, 168, adln_community1_gpps),
- ADL_COMMUNITY(2, 169, 248, adln_community4_gpps),
- ADL_COMMUNITY(3, 249, 256, adln_community5_gpps),
+ ADL_N_COMMUNITY(0, 0, 66, adln_community0_gpps),
+ ADL_N_COMMUNITY(1, 67, 168, adln_community1_gpps),
+ ADL_N_COMMUNITY(2, 169, 248, adln_community4_gpps),
+ ADL_N_COMMUNITY(3, 249, 256, adln_community5_gpps),
};
static const struct intel_pinctrl_soc_data adln_soc_data = {
@@ -713,11 +733,11 @@ static const struct intel_padgroup adls_community5_gpps[] = {
};
static const struct intel_community adls_communities[] = {
- ADL_COMMUNITY(0, 0, 94, adls_community0_gpps),
- ADL_COMMUNITY(1, 95, 150, adls_community1_gpps),
- ADL_COMMUNITY(2, 151, 199, adls_community3_gpps),
- ADL_COMMUNITY(3, 200, 269, adls_community4_gpps),
- ADL_COMMUNITY(4, 270, 303, adls_community5_gpps),
+ ADL_S_COMMUNITY(0, 0, 94, adls_community0_gpps),
+ ADL_S_COMMUNITY(1, 95, 150, adls_community1_gpps),
+ ADL_S_COMMUNITY(2, 151, 199, adls_community3_gpps),
+ ADL_S_COMMUNITY(3, 200, 269, adls_community4_gpps),
+ ADL_S_COMMUNITY(4, 270, 303, adls_community5_gpps),
};
static const struct intel_pinctrl_soc_data adls_soc_data = {
diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig
index 8dca1ef04965..40accd110c3d 100644
--- a/drivers/pinctrl/mediatek/Kconfig
+++ b/drivers/pinctrl/mediatek/Kconfig
@@ -30,6 +30,7 @@ config PINCTRL_MTK_MOORE
select GENERIC_PINMUX_FUNCTIONS
select GPIOLIB
select OF_GPIO
+ select EINT_MTK
select PINCTRL_MTK_V2
config PINCTRL_MTK_PARIS
diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c
index 727c65221aef..57f37a294063 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt8365.c
@@ -259,7 +259,7 @@ static const struct mtk_pin_ies_smt_set mt8365_ies_set[] = {
MTK_PIN_IES_SMT_SPEC(104, 104, 0x420, 13),
MTK_PIN_IES_SMT_SPEC(105, 109, 0x420, 14),
MTK_PIN_IES_SMT_SPEC(110, 113, 0x420, 15),
- MTK_PIN_IES_SMT_SPEC(114, 112, 0x420, 16),
+ MTK_PIN_IES_SMT_SPEC(114, 116, 0x420, 16),
MTK_PIN_IES_SMT_SPEC(117, 119, 0x420, 17),
MTK_PIN_IES_SMT_SPEC(120, 122, 0x420, 18),
MTK_PIN_IES_SMT_SPEC(123, 125, 0x420, 19),
diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index 003fb0e34153..6a956ee94494 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -129,6 +129,7 @@ enum {
FUNC_PTP1,
FUNC_PTP2,
FUNC_PTP3,
+ FUNC_PTPSYNC_0,
FUNC_PTPSYNC_1,
FUNC_PTPSYNC_2,
FUNC_PTPSYNC_3,
@@ -252,6 +253,7 @@ static const char *const ocelot_function_names[] = {
[FUNC_PTP1] = "ptp1",
[FUNC_PTP2] = "ptp2",
[FUNC_PTP3] = "ptp3",
+ [FUNC_PTPSYNC_0] = "ptpsync_0",
[FUNC_PTPSYNC_1] = "ptpsync_1",
[FUNC_PTPSYNC_2] = "ptpsync_2",
[FUNC_PTPSYNC_3] = "ptpsync_3",
@@ -983,7 +985,7 @@ LAN966X_P(31, GPIO, FC3_c, CAN1, NONE, OB_TRG, RECO_b, NON
LAN966X_P(32, GPIO, FC3_c, NONE, SGPIO_a, NONE, MIIM_Sa, NONE, R);
LAN966X_P(33, GPIO, FC1_b, NONE, SGPIO_a, NONE, MIIM_Sa, MIIM_b, R);
LAN966X_P(34, GPIO, FC1_b, NONE, SGPIO_a, NONE, MIIM_Sa, MIIM_b, R);
-LAN966X_P(35, GPIO, FC1_b, NONE, SGPIO_a, CAN0_b, NONE, NONE, R);
+LAN966X_P(35, GPIO, FC1_b, PTPSYNC_0, SGPIO_a, CAN0_b, NONE, NONE, R);
LAN966X_P(36, GPIO, NONE, PTPSYNC_1, NONE, CAN0_b, NONE, NONE, R);
LAN966X_P(37, GPIO, FC_SHRD0, PTPSYNC_2, TWI_SLC_GATE_AD, NONE, NONE, NONE, R);
LAN966X_P(38, GPIO, NONE, PTPSYNC_3, NONE, NONE, NONE, NONE, R);
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index 8d271c6b0ca4..5de691c630b4 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -1374,10 +1374,10 @@ static int pistachio_gpio_register(struct pistachio_pinctrl *pctl)
}
irq = irq_of_parse_and_map(child, 0);
- if (irq < 0) {
- dev_err(pctl->dev, "No IRQ for bank %u: %d\n", i, irq);
+ if (!irq) {
+ dev_err(pctl->dev, "No IRQ for bank %u\n", i);
of_node_put(child);
- ret = irq;
+ ret = -EINVAL;
goto err;
}
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index a1b598b86aa9..2cb79e649fcf 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -457,95 +457,110 @@ static struct rockchip_mux_recalced_data rk3128_mux_recalced_data[] = {
static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = {
{
+ /* gpio1b6_sel */
.num = 1,
.pin = 14,
.reg = 0x28,
.bit = 12,
.mask = 0xf
}, {
+ /* gpio1b7_sel */
.num = 1,
.pin = 15,
.reg = 0x2c,
.bit = 0,
.mask = 0x3
}, {
+ /* gpio1c2_sel */
.num = 1,
.pin = 18,
.reg = 0x30,
.bit = 4,
.mask = 0xf
}, {
+ /* gpio1c3_sel */
.num = 1,
.pin = 19,
.reg = 0x30,
.bit = 8,
.mask = 0xf
}, {
+ /* gpio1c4_sel */
.num = 1,
.pin = 20,
.reg = 0x30,
.bit = 12,
.mask = 0xf
}, {
+ /* gpio1c5_sel */
.num = 1,
.pin = 21,
.reg = 0x34,
.bit = 0,
.mask = 0xf
}, {
+ /* gpio1c6_sel */
.num = 1,
.pin = 22,
.reg = 0x34,
.bit = 4,
.mask = 0xf
}, {
+ /* gpio1c7_sel */
.num = 1,
.pin = 23,
.reg = 0x34,
.bit = 8,
.mask = 0xf
}, {
- .num = 3,
- .pin = 12,
- .reg = 0x68,
- .bit = 8,
- .mask = 0xf
- }, {
- .num = 3,
- .pin = 13,
- .reg = 0x68,
- .bit = 12,
- .mask = 0xf
- }, {
+ /* gpio2a2_sel */
.num = 2,
.pin = 2,
- .reg = 0x608,
- .bit = 0,
- .mask = 0x7
+ .reg = 0x40,
+ .bit = 4,
+ .mask = 0x3
}, {
+ /* gpio2a3_sel */
.num = 2,
.pin = 3,
- .reg = 0x608,
- .bit = 4,
- .mask = 0x7
+ .reg = 0x40,
+ .bit = 6,
+ .mask = 0x3
}, {
+ /* gpio2c0_sel */
.num = 2,
.pin = 16,
- .reg = 0x610,
- .bit = 8,
- .mask = 0x7
+ .reg = 0x50,
+ .bit = 0,
+ .mask = 0x3
}, {
+ /* gpio3b2_sel */
.num = 3,
.pin = 10,
- .reg = 0x610,
- .bit = 0,
- .mask = 0x7
+ .reg = 0x68,
+ .bit = 4,
+ .mask = 0x3
}, {
+ /* gpio3b3_sel */
.num = 3,
.pin = 11,
- .reg = 0x610,
- .bit = 4,
- .mask = 0x7
+ .reg = 0x68,
+ .bit = 6,
+ .mask = 0x3
+ }, {
+ /* gpio3b4_sel */
+ .num = 3,
+ .pin = 12,
+ .reg = 0x68,
+ .bit = 8,
+ .mask = 0xf
+ }, {
+ /* gpio3b5_sel */
+ .num = 3,
+ .pin = 13,
+ .reg = 0x68,
+ .bit = 12,
+ .mask = 0xf
},
};
diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c
index 4d37b817b232..a91a86628f2f 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm6350.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c
@@ -264,14 +264,14 @@ static const struct pinctrl_pin_desc sm6350_pins[] = {
PINCTRL_PIN(153, "GPIO_153"),
PINCTRL_PIN(154, "GPIO_154"),
PINCTRL_PIN(155, "GPIO_155"),
- PINCTRL_PIN(156, "SDC1_RCLK"),
- PINCTRL_PIN(157, "SDC1_CLK"),
- PINCTRL_PIN(158, "SDC1_CMD"),
- PINCTRL_PIN(159, "SDC1_DATA"),
- PINCTRL_PIN(160, "SDC2_CLK"),
- PINCTRL_PIN(161, "SDC2_CMD"),
- PINCTRL_PIN(162, "SDC2_DATA"),
- PINCTRL_PIN(163, "UFS_RESET"),
+ PINCTRL_PIN(156, "UFS_RESET"),
+ PINCTRL_PIN(157, "SDC1_RCLK"),
+ PINCTRL_PIN(158, "SDC1_CLK"),
+ PINCTRL_PIN(159, "SDC1_CMD"),
+ PINCTRL_PIN(160, "SDC1_DATA"),
+ PINCTRL_PIN(161, "SDC2_CLK"),
+ PINCTRL_PIN(162, "SDC2_CMD"),
+ PINCTRL_PIN(163, "SDC2_DATA"),
};
#define DECLARE_MSM_GPIO_PINS(pin) \
diff --git a/drivers/pinctrl/samsung/Kconfig b/drivers/pinctrl/samsung/Kconfig
index dfd805e76862..7b0576f71376 100644
--- a/drivers/pinctrl/samsung/Kconfig
+++ b/drivers/pinctrl/samsung/Kconfig
@@ -4,14 +4,13 @@
#
config PINCTRL_SAMSUNG
bool
- depends on OF_GPIO
+ select GPIOLIB
select PINMUX
select PINCONF
config PINCTRL_EXYNOS
bool "Pinctrl common driver part for Samsung Exynos SoCs"
- depends on OF_GPIO
- depends on ARCH_EXYNOS || ARCH_S5PV210 || COMPILE_TEST
+ depends on ARCH_EXYNOS || ARCH_S5PV210 || (COMPILE_TEST && OF)
select PINCTRL_SAMSUNG
select PINCTRL_EXYNOS_ARM if ARM && (ARCH_EXYNOS || ARCH_S5PV210)
select PINCTRL_EXYNOS_ARM64 if ARM64 && ARCH_EXYNOS
@@ -26,12 +25,10 @@ config PINCTRL_EXYNOS_ARM64
config PINCTRL_S3C24XX
bool "Samsung S3C24XX SoC pinctrl driver"
- depends on OF_GPIO
- depends on ARCH_S3C24XX || COMPILE_TEST
+ depends on ARCH_S3C24XX || (COMPILE_TEST && OF)
select PINCTRL_SAMSUNG
config PINCTRL_S3C64XX
bool "Samsung S3C64XX SoC pinctrl driver"
- depends on OF_GPIO
- depends on ARCH_S3C64XX || COMPILE_TEST
+ depends on ARCH_S3C64XX || (COMPILE_TEST && OF)
select PINCTRL_SAMSUNG
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
index d291819c2f77..cb965cf93705 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c
@@ -770,7 +770,7 @@ static const struct samsung_pin_bank_data fsd_pin_banks2[] __initconst = {
EXYNOS850_PIN_BANK_EINTN(3, 0x00, "gpq0"),
};
-const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = {
+static const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = {
{
/* pin-controller instance 0 FSYS0 data */
.pin_banks = fsd_pin_banks0,
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 9ed764731570..f7c9459f6628 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -225,6 +225,13 @@ static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset)
pinctrl_gpio_free(chip->base + offset);
}
+static int stm32_gpio_get_noclk(struct gpio_chip *chip, unsigned int offset)
+{
+ struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
+
+ return !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
+}
+
static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
{
struct stm32_gpio_bank *bank = gpiochip_get_data(chip);
@@ -232,7 +239,7 @@ static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset)
clk_enable(bank->clk);
- ret = !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset));
+ ret = stm32_gpio_get_noclk(chip, offset);
clk_disable(bank->clk);
@@ -311,8 +318,12 @@ static void stm32_gpio_irq_trigger(struct irq_data *d)
struct stm32_gpio_bank *bank = d->domain->host_data;
int level;
+ /* Do not access the GPIO if this is not LEVEL triggered IRQ. */
+ if (!(bank->irq_type[d->hwirq] & IRQ_TYPE_LEVEL_MASK))
+ return;
+
/* If level interrupt type then retrig */
- level = stm32_gpio_get(&bank->gpio_chip, d->hwirq);
+ level = stm32_gpio_get_noclk(&bank->gpio_chip, d->hwirq);
if ((level == 0 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_LOW) ||
(level == 1 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_HIGH))
irq_chip_retrigger_hierarchy(d);
@@ -354,6 +365,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
{
struct stm32_gpio_bank *bank = irq_data->domain->host_data;
struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
+ unsigned long flags;
int ret;
ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq);
@@ -367,6 +379,10 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data)
return ret;
}
+ flags = irqd_get_trigger_type(irq_data);
+ if (flags & IRQ_TYPE_LEVEL_MASK)
+ clk_enable(bank->clk);
+
return 0;
}
@@ -374,6 +390,9 @@ static void stm32_gpio_irq_release_resources(struct irq_data *irq_data)
{
struct stm32_gpio_bank *bank = irq_data->domain->host_data;
+ if (bank->irq_type[irq_data->hwirq] & IRQ_TYPE_LEVEL_MASK)
+ clk_disable(bank->clk);
+
gpiochip_unlock_as_irq(&bank->gpio_chip, irq_data->hwirq);
}
diff --git a/drivers/pinctrl/sunplus/sppctl_sp7021.c b/drivers/pinctrl/sunplus/sppctl_sp7021.c
index 9748345b9298..cd657760a644 100644
--- a/drivers/pinctrl/sunplus/sppctl_sp7021.c
+++ b/drivers/pinctrl/sunplus/sppctl_sp7021.c
@@ -419,7 +419,15 @@ static const struct sppctl_grp sp7021grps_prbp[] = {
EGRP("PROBE_PORT2", 2, pins_prp2),
};
+/*
+ * Due to compatible reason, the first valid item should start at the third
+ * position of the array. Please keep the first two items of the table
+ * no use (dummy).
+ */
const struct sppctl_func sppctl_list_funcs[] = {
+ FNCN("", pinmux_type_fpmx, 0x00, 0, 0),
+ FNCN("", pinmux_type_fpmx, 0x00, 0, 0),
+
FNCN("L2SW_CLK_OUT", pinmux_type_fpmx, 0x00, 0, 7),
FNCN("L2SW_MAC_SMI_MDC", pinmux_type_fpmx, 0x00, 8, 7),
FNCN("L2SW_LED_FLASH0", pinmux_type_fpmx, 0x01, 0, 7),
diff --git a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
index 2801ca706273..b8fc88a23cf4 100644
--- a/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
+++ b/drivers/pinctrl/sunxi/pinctrl-suniv-f1c100s.c
@@ -51,7 +51,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
SUNXI_FUNCTION(0x3, "pwm0"), /* PWM0 */
SUNXI_FUNCTION(0x4, "i2s"), /* IN */
SUNXI_FUNCTION(0x5, "uart1"), /* RX */
- SUNXI_FUNCTION(0x6, "spi1")), /* MOSI */
+ SUNXI_FUNCTION(0x6, "spi1")), /* CLK */
SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 3),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
@@ -204,7 +204,7 @@ static const struct sunxi_desc_pin suniv_f1c100s_pins[] = {
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
SUNXI_FUNCTION(0x2, "lcd"), /* D20 */
- SUNXI_FUNCTION(0x3, "lvds1"), /* RX */
+ SUNXI_FUNCTION(0x3, "uart2"), /* RX */
SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)),
SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15),
SUNXI_FUNCTION(0x0, "gpio_in"),
diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c
index d384d36098c2..a62c5dfe42d6 100644
--- a/drivers/platform/surface/aggregator/core.c
+++ b/drivers/platform/surface/aggregator/core.c
@@ -817,7 +817,7 @@ err_cpkg:
err_bus:
return status;
}
-module_init(ssam_core_init);
+subsys_initcall(ssam_core_init);
static void __exit ssam_core_exit(void)
{
diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c
index c1775db29efb..ec66fde28e75 100644
--- a/drivers/platform/surface/surface_gpe.c
+++ b/drivers/platform/surface/surface_gpe.c
@@ -100,6 +100,14 @@ static const struct dmi_system_id dmi_lid_device_table[] = {
.driver_data = (void *)lid_device_props_l4D,
},
{
+ .ident = "Surface Pro 8",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 8"),
+ },
+ .driver_data = (void *)lid_device_props_l4B,
+ },
+ {
.ident = "Surface Book 1",
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 2104a2621e50..0e7fbed8a50d 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -371,10 +371,14 @@ static int asus_wmi_evaluate_method_buf(u32 method_id,
switch (obj->type) {
case ACPI_TYPE_BUFFER:
- if (obj->buffer.length > size)
+ if (obj->buffer.length > size) {
err = -ENOSPC;
- if (obj->buffer.length == 0)
+ break;
+ }
+ if (obj->buffer.length == 0) {
err = -ENODATA;
+ break;
+ }
memcpy(ret_buffer, obj->buffer.pointer, obj->buffer.length);
break;
@@ -2223,9 +2227,10 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available,
err = fan_curve_get_factory_default(asus, fan_dev);
if (err) {
- if (err == -ENODEV || err == -ENODATA)
- return 0;
- return err;
+ pr_debug("fan_curve_get_factory_default(0x%08x) failed: %d\n",
+ fan_dev, err);
+ /* Don't cause probe to fail on devices without fan-curves */
+ return 0;
}
*available = true;
diff --git a/drivers/platform/x86/dell/dell-laptop.c b/drivers/platform/x86/dell/dell-laptop.c
index 8230e7a68a5e..1321687d923e 100644
--- a/drivers/platform/x86/dell/dell-laptop.c
+++ b/drivers/platform/x86/dell/dell-laptop.c
@@ -80,6 +80,10 @@ static struct quirk_entry quirk_dell_inspiron_1012 = {
.kbd_led_not_present = true,
};
+static struct quirk_entry quirk_dell_latitude_7520 = {
+ .kbd_missing_ac_tag = true,
+};
+
static struct platform_driver platform_driver = {
.driver = {
.name = "dell-laptop",
@@ -336,6 +340,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
},
.driver_data = &quirk_dell_inspiron_1012,
},
+ {
+ .callback = dmi_matched,
+ .ident = "Dell Latitude 7520",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 7520"),
+ },
+ .driver_data = &quirk_dell_latitude_7520,
+ },
{ }
};
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 658bab4b7964..e87a931eab1e 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -148,6 +148,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h
index a46d3b53bf61..7a059e02c265 100644
--- a/drivers/platform/x86/intel/pmc/core.h
+++ b/drivers/platform/x86/intel/pmc/core.h
@@ -236,7 +236,7 @@ enum ppfear_regs {
#define ADL_LPM_STATUS_LATCH_EN_OFFSET 0x1704
#define ADL_LPM_LIVE_STATUS_OFFSET 0x1764
-const char *pmc_lpm_modes[] = {
+static const char *pmc_lpm_modes[] = {
"S0i2.0",
"S0i2.1",
"S0i2.2",
diff --git a/drivers/platform/x86/intel/pmt/telemetry.c b/drivers/platform/x86/intel/pmt/telemetry.c
index 6b6f3e2a617a..f73ecfd4a309 100644
--- a/drivers/platform/x86/intel/pmt/telemetry.c
+++ b/drivers/platform/x86/intel/pmt/telemetry.c
@@ -103,7 +103,7 @@ static int pmt_telem_probe(struct auxiliary_device *auxdev, const struct auxilia
auxiliary_set_drvdata(auxdev, priv);
for (i = 0; i < intel_vsec_dev->num_resources; i++) {
- struct intel_pmt_entry *entry = &priv->entry[i];
+ struct intel_pmt_entry *entry = &priv->entry[priv->num_entries];
ret = intel_pmt_dev_create(entry, &pmt_telem_ns, intel_vsec_dev, i);
if (ret < 0)
diff --git a/drivers/platform/x86/intel/sdsi.c b/drivers/platform/x86/intel/sdsi.c
index 11d14cc0ff0a..c830e98dfa38 100644
--- a/drivers/platform/x86/intel/sdsi.c
+++ b/drivers/platform/x86/intel/sdsi.c
@@ -51,6 +51,8 @@
#define MBOX_TIMEOUT_US 2000
#define MBOX_TIMEOUT_ACQUIRE_US 1000
#define MBOX_POLLING_PERIOD_US 100
+#define MBOX_ACQUIRE_NUM_RETRIES 5
+#define MBOX_ACQUIRE_RETRY_DELAY_MS 500
#define MBOX_MAX_PACKETS 4
#define MBOX_OWNER_NONE 0x00
@@ -81,7 +83,7 @@ enum sdsi_command {
struct sdsi_mbox_info {
u64 *payload;
- u64 *buffer;
+ void *buffer;
int size;
};
@@ -163,9 +165,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf
total = 0;
loop = 0;
do {
- int offset = SDSI_SIZE_MAILBOX * loop;
- void __iomem *addr = priv->mbox_addr + offset;
- u64 *buf = info->buffer + offset / SDSI_SIZE_CMD;
+ void *buf = info->buffer + (SDSI_SIZE_MAILBOX * loop);
u32 packet_size;
/* Poll on ready bit */
@@ -196,7 +196,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf
break;
}
- sdsi_memcpy64_fromio(buf, addr, round_up(packet_size, SDSI_SIZE_CMD));
+ sdsi_memcpy64_fromio(buf, priv->mbox_addr, round_up(packet_size, SDSI_SIZE_CMD));
total += packet_size;
@@ -243,8 +243,8 @@ static int sdsi_mbox_cmd_write(struct sdsi_priv *priv, struct sdsi_mbox_info *in
FIELD_PREP(CTRL_PACKET_SIZE, info->size);
writeq(control, priv->control_addr);
- /* Poll on run_busy bit */
- ret = readq_poll_timeout(priv->control_addr, control, !(control & CTRL_RUN_BUSY),
+ /* Poll on ready bit */
+ ret = readq_poll_timeout(priv->control_addr, control, control & CTRL_READY,
MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_US);
if (ret)
@@ -263,7 +263,7 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info
{
u64 control;
u32 owner;
- int ret;
+ int ret, retries = 0;
lockdep_assert_held(&priv->mb_lock);
@@ -273,13 +273,29 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info
if (owner != MBOX_OWNER_NONE)
return -EBUSY;
- /* Write first qword of payload */
- writeq(info->payload[0], priv->mbox_addr);
+ /*
+ * If there has been no recent transaction and no one owns the mailbox,
+ * we should acquire it in under 1ms. However, if we've accessed it
+ * recently it may take up to 2.1 seconds to acquire it again.
+ */
+ do {
+ /* Write first qword of payload */
+ writeq(info->payload[0], priv->mbox_addr);
+
+ /* Check for ownership */
+ ret = readq_poll_timeout(priv->control_addr, control,
+ FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_INBAND,
+ MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US);
+
+ if (FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_NONE &&
+ retries++ < MBOX_ACQUIRE_NUM_RETRIES) {
+ msleep(MBOX_ACQUIRE_RETRY_DELAY_MS);
+ continue;
+ }
- /* Check for ownership */
- ret = readq_poll_timeout(priv->control_addr, control,
- FIELD_GET(CTRL_OWNER, control) & MBOX_OWNER_INBAND,
- MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US);
+ /* Either we got it or someone else did. */
+ break;
+ } while (true);
return ret;
}
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
index c61f804dd44e..8f9c571d7257 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c
@@ -212,6 +212,9 @@ static int __init intel_uncore_init(void)
const struct x86_cpu_id *id;
int ret;
+ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return -ENODEV;
+
id = x86_match_cpu(intel_uncore_cpu_ids);
if (!id)
return -ENODEV;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c568fae56db2..e6cb4a14cdd4 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -309,6 +309,20 @@ struct ibm_init_struct {
struct ibm_struct *data;
};
+/* DMI Quirks */
+struct quirk_entry {
+ bool btusb_bug;
+ u32 s2idle_bug_mmio;
+};
+
+static struct quirk_entry quirk_btusb_bug = {
+ .btusb_bug = true,
+};
+
+static struct quirk_entry quirk_s2idle_bug = {
+ .s2idle_bug_mmio = 0xfed80380,
+};
+
static struct {
u32 bluetooth:1;
u32 hotkey:1;
@@ -338,6 +352,7 @@ static struct {
u32 hotkey_poll_active:1;
u32 has_adaptive_kbd:1;
u32 kbd_lang:1;
+ struct quirk_entry *quirks;
} tp_features;
static struct {
@@ -4359,9 +4374,10 @@ static void bluetooth_exit(void)
bluetooth_shutdown();
}
-static const struct dmi_system_id bt_fwbug_list[] __initconst = {
+static const struct dmi_system_id fwbug_list[] __initconst = {
{
.ident = "ThinkPad E485",
+ .driver_data = &quirk_btusb_bug,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "20KU"),
@@ -4369,6 +4385,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
},
{
.ident = "ThinkPad E585",
+ .driver_data = &quirk_btusb_bug,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "20KV"),
@@ -4376,6 +4393,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
},
{
.ident = "ThinkPad A285 - 20MW",
+ .driver_data = &quirk_btusb_bug,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "20MW"),
@@ -4383,6 +4401,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
},
{
.ident = "ThinkPad A285 - 20MX",
+ .driver_data = &quirk_btusb_bug,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "20MX"),
@@ -4390,6 +4409,7 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
},
{
.ident = "ThinkPad A485 - 20MU",
+ .driver_data = &quirk_btusb_bug,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "20MU"),
@@ -4397,14 +4417,125 @@ static const struct dmi_system_id bt_fwbug_list[] __initconst = {
},
{
.ident = "ThinkPad A485 - 20MV",
+ .driver_data = &quirk_btusb_bug,
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_BOARD_NAME, "20MV"),
},
},
+ {
+ .ident = "L14 Gen2 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20X5"),
+ }
+ },
+ {
+ .ident = "T14s Gen2 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20XF"),
+ }
+ },
+ {
+ .ident = "X13 Gen2 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20XH"),
+ }
+ },
+ {
+ .ident = "T14 Gen2 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20XK"),
+ }
+ },
+ {
+ .ident = "T14 Gen1 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20UD"),
+ }
+ },
+ {
+ .ident = "T14 Gen1 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20UE"),
+ }
+ },
+ {
+ .ident = "T14s Gen1 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20UH"),
+ }
+ },
+ {
+ .ident = "P14s Gen1 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"),
+ }
+ },
+ {
+ .ident = "P14s Gen2 AMD",
+ .driver_data = &quirk_s2idle_bug,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "21A0"),
+ }
+ },
{}
};
+#ifdef CONFIG_SUSPEND
+/*
+ * Lenovo laptops from a variety of generations run a SMI handler during the D3->D0
+ * transition that occurs specifically when exiting suspend to idle which can cause
+ * large delays during resume when the IOMMU translation layer is enabled (the default
+ * behavior) for NVME devices:
+ *
+ * To avoid this firmware problem, skip the SMI handler on these machines before the
+ * D0 transition occurs.
+ */
+static void thinkpad_acpi_amd_s2idle_restore(void)
+{
+ struct resource *res;
+ void __iomem *addr;
+ u8 val;
+
+ res = request_mem_region_muxed(tp_features.quirks->s2idle_bug_mmio, 1,
+ "thinkpad_acpi_pm80");
+ if (!res)
+ return;
+
+ addr = ioremap(tp_features.quirks->s2idle_bug_mmio, 1);
+ if (!addr)
+ goto cleanup_resource;
+
+ val = ioread8(addr);
+ iowrite8(val & ~BIT(0), addr);
+
+ iounmap(addr);
+cleanup_resource:
+ release_resource(res);
+}
+
+static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = {
+ .restore = thinkpad_acpi_amd_s2idle_restore,
+};
+#endif
+
static const struct pci_device_id fwbug_cards_ids[] __initconst = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) },
@@ -4419,7 +4550,8 @@ static int __init have_bt_fwbug(void)
* Some AMD based ThinkPads have a firmware bug that calling
* "GBDC" will cause bluetooth on Intel wireless cards blocked
*/
- if (dmi_check_system(bt_fwbug_list) && pci_dev_present(fwbug_cards_ids)) {
+ if (tp_features.quirks && tp_features.quirks->btusb_bug &&
+ pci_dev_present(fwbug_cards_ids)) {
vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_RFKILL,
FW_BUG "disable bluetooth subdriver for Intel cards\n");
return 1;
@@ -8748,24 +8880,27 @@ static int __init fan_init(struct ibm_init_struct *iibm)
fan_status_access_mode = TPACPI_FAN_RD_TPEC;
if (quirks & TPACPI_FAN_Q1)
fan_quirk1_setup();
- if (quirks & TPACPI_FAN_2FAN) {
- tp_features.second_fan = 1;
- pr_info("secondary fan support enabled\n");
- }
- if (quirks & TPACPI_FAN_2CTL) {
- tp_features.second_fan = 1;
- tp_features.second_fan_ctl = 1;
- pr_info("secondary fan control enabled\n");
- }
/* Try and probe the 2nd fan */
+ tp_features.second_fan = 1; /* needed for get_speed to work */
res = fan2_get_speed(&speed);
if (res >= 0) {
/* It responded - so let's assume it's there */
tp_features.second_fan = 1;
tp_features.second_fan_ctl = 1;
pr_info("secondary fan control detected & enabled\n");
+ } else {
+ /* Fan not auto-detected */
+ tp_features.second_fan = 0;
+ if (quirks & TPACPI_FAN_2FAN) {
+ tp_features.second_fan = 1;
+ pr_info("secondary fan support enabled\n");
+ }
+ if (quirks & TPACPI_FAN_2CTL) {
+ tp_features.second_fan = 1;
+ tp_features.second_fan_ctl = 1;
+ pr_info("secondary fan control enabled\n");
+ }
}
-
} else {
pr_err("ThinkPad ACPI EC access misbehaving, fan status and control unavailable\n");
return -ENODEV;
@@ -11455,6 +11590,10 @@ static void thinkpad_acpi_module_exit(void)
tpacpi_lifecycle = TPACPI_LIFE_EXITING;
+#ifdef CONFIG_SUSPEND
+ if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio)
+ acpi_unregister_lps0_dev(&thinkpad_acpi_s2idle_dev_ops);
+#endif
if (tpacpi_hwmon)
hwmon_device_unregister(tpacpi_hwmon);
if (tp_features.sensors_pdrv_registered)
@@ -11496,6 +11635,7 @@ static void thinkpad_acpi_module_exit(void)
static int __init thinkpad_acpi_module_init(void)
{
+ const struct dmi_system_id *dmi_id;
int ret, i;
tpacpi_lifecycle = TPACPI_LIFE_INIT;
@@ -11535,6 +11675,10 @@ static int __init thinkpad_acpi_module_init(void)
return -ENODEV;
}
+ dmi_id = dmi_first_match(fwbug_list);
+ if (dmi_id)
+ tp_features.quirks = dmi_id->driver_data;
+
/* Device initialization */
tpacpi_pdev = platform_device_register_simple(TPACPI_DRVR_NAME, -1,
NULL, 0);
@@ -11623,6 +11767,13 @@ static int __init thinkpad_acpi_module_init(void)
tp_features.input_device_registered = 1;
}
+#ifdef CONFIG_SUSPEND
+ if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) {
+ if (!acpi_register_lps0_dev(&thinkpad_acpi_s2idle_dev_ops))
+ pr_info("Using s2idle quirk to avoid %s platform firmware bug\n",
+ (dmi_id && dmi_id->ident) ? dmi_id->ident : "");
+ }
+#endif
return 0;
}
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 0feaa4b45317..860672d6a03c 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -300,7 +300,7 @@ struct ptp_ocp {
struct platform_device *spi_flash;
struct clk_hw *i2c_clk;
struct timer_list watchdog;
- const struct ocp_attr_group *attr_tbl;
+ const struct attribute_group **attr_group;
const struct ptp_ocp_eeprom_map *eeprom_map;
struct dentry *debug_root;
time64_t gnss_lost;
@@ -841,7 +841,7 @@ __ptp_ocp_adjtime_locked(struct ptp_ocp *bp, u32 adj_val)
}
static void
-ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
+ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, s64 delta_ns)
{
struct timespec64 ts;
unsigned long flags;
@@ -850,7 +850,8 @@ ptp_ocp_adjtime_coarse(struct ptp_ocp *bp, u64 delta_ns)
spin_lock_irqsave(&bp->lock, flags);
err = __ptp_ocp_gettime_locked(bp, &ts, NULL);
if (likely(!err)) {
- timespec64_add_ns(&ts, delta_ns);
+ set_normalized_timespec64(&ts, ts.tv_sec,
+ ts.tv_nsec + delta_ns);
__ptp_ocp_settime_locked(bp, &ts);
}
spin_unlock_irqrestore(&bp->lock, flags);
@@ -1557,7 +1558,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s)
start_ns = ktime_set(ts.tv_sec, ts.tv_nsec) + NSEC_PER_MSEC;
if (!s->start) {
/* roundup() does not work on 32-bit systems */
- s->start = DIV_ROUND_UP_ULL(start_ns, s->period);
+ s->start = DIV64_U64_ROUND_UP(start_ns, s->period);
s->start = ktime_add(s->start, s->phase);
}
@@ -1836,6 +1837,42 @@ ptp_ocp_signal_init(struct ptp_ocp *bp)
}
static void
+ptp_ocp_attr_group_del(struct ptp_ocp *bp)
+{
+ sysfs_remove_groups(&bp->dev.kobj, bp->attr_group);
+ kfree(bp->attr_group);
+}
+
+static int
+ptp_ocp_attr_group_add(struct ptp_ocp *bp,
+ const struct ocp_attr_group *attr_tbl)
+{
+ int count, i;
+ int err;
+
+ count = 0;
+ for (i = 0; attr_tbl[i].cap; i++)
+ if (attr_tbl[i].cap & bp->fw_cap)
+ count++;
+
+ bp->attr_group = kcalloc(count + 1, sizeof(struct attribute_group *),
+ GFP_KERNEL);
+ if (!bp->attr_group)
+ return -ENOMEM;
+
+ count = 0;
+ for (i = 0; attr_tbl[i].cap; i++)
+ if (attr_tbl[i].cap & bp->fw_cap)
+ bp->attr_group[count++] = attr_tbl[i].group;
+
+ err = sysfs_create_groups(&bp->dev.kobj, bp->attr_group);
+ if (err)
+ bp->attr_group[0] = NULL;
+
+ return err;
+}
+
+static void
ptp_ocp_sma_init(struct ptp_ocp *bp)
{
u32 reg;
@@ -1904,7 +1941,6 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
bp->flash_start = 1024 * 4096;
bp->eeprom_map = fb_eeprom_map;
bp->fw_version = ioread32(&bp->image->version);
- bp->attr_tbl = fb_timecard_groups;
bp->fw_cap = OCP_CAP_BASIC;
ver = bp->fw_version & 0xffff;
@@ -1918,6 +1954,10 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
ptp_ocp_sma_init(bp);
ptp_ocp_signal_init(bp);
+ err = ptp_ocp_attr_group_add(bp, fb_timecard_groups);
+ if (err)
+ return err;
+
err = ptp_ocp_fb_set_pins(bp);
if (err)
return err;
@@ -3388,7 +3428,6 @@ ptp_ocp_complete(struct ptp_ocp *bp)
{
struct pps_device *pps;
char buf[32];
- int i, err;
if (bp->gnss_port != -1) {
sprintf(buf, "ttyS%d", bp->gnss_port);
@@ -3413,14 +3452,6 @@ ptp_ocp_complete(struct ptp_ocp *bp)
if (pps)
ptp_ocp_symlink(bp, pps->dev, "pps");
- for (i = 0; bp->attr_tbl[i].cap; i++) {
- if (!(bp->attr_tbl[i].cap & bp->fw_cap))
- continue;
- err = sysfs_create_group(&bp->dev.kobj, bp->attr_tbl[i].group);
- if (err)
- return err;
- }
-
ptp_ocp_debugfs_add_device(bp);
return 0;
@@ -3492,15 +3523,11 @@ static void
ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
{
struct device *dev = &bp->dev;
- int i;
sysfs_remove_link(&dev->kobj, "ttyGNSS");
sysfs_remove_link(&dev->kobj, "ttyMAC");
sysfs_remove_link(&dev->kobj, "ptp");
sysfs_remove_link(&dev->kobj, "pps");
- if (bp->attr_tbl)
- for (i = 0; bp->attr_tbl[i].cap; i++)
- sysfs_remove_group(&dev->kobj, bp->attr_tbl[i].group);
}
static void
@@ -3510,6 +3537,7 @@ ptp_ocp_detach(struct ptp_ocp *bp)
ptp_ocp_debugfs_remove_device(bp);
ptp_ocp_detach_sysfs(bp);
+ ptp_ocp_attr_group_del(bp);
if (timer_pending(&bp->watchdog))
del_timer_sync(&bp->watchdog);
if (bp->ts0)
diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
index 5b3e4da63406..5252ce4cbda4 100644
--- a/drivers/rtc/rtc-sun6i.c
+++ b/drivers/rtc/rtc-sun6i.c
@@ -370,6 +370,23 @@ CLK_OF_DECLARE_DRIVER(sun8i_h3_rtc_clk, "allwinner,sun8i-h3-rtc",
CLK_OF_DECLARE_DRIVER(sun50i_h5_rtc_clk, "allwinner,sun50i-h5-rtc",
sun8i_h3_rtc_clk_init);
+static const struct sun6i_rtc_clk_data sun50i_h6_rtc_data = {
+ .rc_osc_rate = 16000000,
+ .fixed_prescaler = 32,
+ .has_prescaler = 1,
+ .has_out_clk = 1,
+ .export_iosc = 1,
+ .has_losc_en = 1,
+ .has_auto_swt = 1,
+};
+
+static void __init sun50i_h6_rtc_clk_init(struct device_node *node)
+{
+ sun6i_rtc_clk_init(node, &sun50i_h6_rtc_data);
+}
+CLK_OF_DECLARE_DRIVER(sun50i_h6_rtc_clk, "allwinner,sun50i-h6-rtc",
+ sun50i_h6_rtc_clk_init);
+
/*
* The R40 user manual is self-conflicting on whether the prescaler is
* fixed or configurable. The clock diagram shows it as fixed, but there
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 8e87a31e329d..ba6d78789660 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1422,6 +1422,13 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
if (!cqr->lpm)
cqr->lpm = dasd_path_get_opm(device);
}
+ /*
+ * remember the amount of formatted tracks to prevent double format on
+ * ESE devices
+ */
+ if (cqr->block)
+ cqr->trkcount = atomic_read(&cqr->block->trkcount);
+
if (cqr->cpmode == 1) {
rc = ccw_device_tm_start(device->cdev, cqr->cpaddr,
(long) cqr, cqr->lpm);
@@ -1639,6 +1646,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
unsigned long now;
int nrf_suppressed = 0;
int fp_suppressed = 0;
+ struct request *req;
u8 *sense = NULL;
int expires;
@@ -1739,7 +1747,12 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
}
if (dasd_ese_needs_format(cqr->block, irb)) {
- if (rq_data_dir((struct request *)cqr->callback_data) == READ) {
+ req = dasd_get_callback_data(cqr);
+ if (!req) {
+ cqr->status = DASD_CQR_ERROR;
+ return;
+ }
+ if (rq_data_dir(req) == READ) {
device->discipline->ese_read(cqr, irb);
cqr->status = DASD_CQR_SUCCESS;
cqr->stopclk = now;
@@ -2765,8 +2778,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
* complete a request partially.
*/
if (proc_bytes) {
- blk_update_request(req, BLK_STS_OK,
- blk_rq_bytes(req) - proc_bytes);
+ blk_update_request(req, BLK_STS_OK, proc_bytes);
blk_mq_requeue_request(req, true);
} else if (likely(!blk_should_fake_timeout(req->q))) {
blk_mq_complete_request(req);
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 8410a25a65c1..836838f7d686 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1480,7 +1480,7 @@ static int dasd_eckd_pe_handler(struct dasd_device *device,
{
struct pe_handler_work_data *data;
- data = kmalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA);
+ data = kzalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA);
if (!data) {
if (mutex_trylock(&dasd_pe_handler_mutex)) {
data = pe_handler_worker;
@@ -1488,9 +1488,6 @@ static int dasd_eckd_pe_handler(struct dasd_device *device,
} else {
return -ENOMEM;
}
- } else {
- memset(data, 0, sizeof(*data));
- data->isglobal = 0;
}
INIT_WORK(&data->worker, do_pe_handler_work);
dasd_get_device(device);
@@ -3083,13 +3080,24 @@ static int dasd_eckd_format_device(struct dasd_device *base,
}
static bool test_and_set_format_track(struct dasd_format_entry *to_format,
- struct dasd_block *block)
+ struct dasd_ccw_req *cqr)
{
+ struct dasd_block *block = cqr->block;
struct dasd_format_entry *format;
unsigned long flags;
bool rc = false;
spin_lock_irqsave(&block->format_lock, flags);
+ if (cqr->trkcount != atomic_read(&block->trkcount)) {
+ /*
+ * The number of formatted tracks has changed after request
+ * start and we can not tell if the current track was involved.
+ * To avoid data corruption treat it as if the current track is
+ * involved
+ */
+ rc = true;
+ goto out;
+ }
list_for_each_entry(format, &block->format_list, list) {
if (format->track == to_format->track) {
rc = true;
@@ -3109,6 +3117,7 @@ static void clear_format_track(struct dasd_format_entry *format,
unsigned long flags;
spin_lock_irqsave(&block->format_lock, flags);
+ atomic_inc(&block->trkcount);
list_del_init(&format->list);
spin_unlock_irqrestore(&block->format_lock, flags);
}
@@ -3145,7 +3154,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
sector_t curr_trk;
int rc;
- req = cqr->callback_data;
+ req = dasd_get_callback_data(cqr);
block = cqr->block;
base = block->base;
private = base->private;
@@ -3170,8 +3179,11 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr,
}
format->track = curr_trk;
/* test if track is already in formatting by another thread */
- if (test_and_set_format_track(format, block))
+ if (test_and_set_format_track(format, cqr)) {
+ /* this is no real error so do not count down retries */
+ cqr->retries++;
return ERR_PTR(-EEXIST);
+ }
fdata.start_unit = curr_trk;
fdata.stop_unit = curr_trk;
@@ -3270,12 +3282,11 @@ static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb)
cqr->proc_bytes = blk_count * blksize;
return 0;
}
- if (dst && !skip_block) {
- dst += off;
+ if (dst && !skip_block)
memset(dst, 0, blksize);
- } else {
+ else
skip_block--;
- }
+ dst += blksize;
blk_count++;
}
}
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index e084f4dedddd..60be7f7bf2d1 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -782,7 +782,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
q->limits.discard_granularity = logical_block_size;
- q->limits.discard_alignment = PAGE_SIZE;
/* Calculate max_discard_sectors and make it PAGE aligned */
max_bytes = USHRT_MAX * logical_block_size;
@@ -791,7 +790,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
blk_queue_max_discard_sectors(q, max_discard_sectors);
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
static int dasd_fba_pe_handler(struct dasd_device *device,
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 3b7af00a7825..83b918b84b4a 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -187,6 +187,7 @@ struct dasd_ccw_req {
void (*callback)(struct dasd_ccw_req *, void *data);
void *callback_data;
unsigned int proc_bytes; /* bytes for partial completion */
+ unsigned int trkcount; /* count formatted tracks */
};
/*
@@ -610,6 +611,7 @@ struct dasd_block {
struct list_head format_list;
spinlock_t format_lock;
+ atomic_t trkcount;
};
struct dasd_attention_data {
@@ -756,6 +758,18 @@ dasd_check_blocksize(int bsize)
return 0;
}
+/*
+ * return the callback data of the original request in case there are
+ * ERP requests build on top of it
+ */
+static inline void *dasd_get_callback_data(struct dasd_ccw_req *cqr)
+{
+ while (cqr->refers)
+ cqr = cqr->refers;
+
+ return cqr->callback_data;
+}
+
/* externals in dasd.c */
#define DASD_PROFILE_OFF 0
#define DASD_PROFILE_ON 1
diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
index 88abfb5e8045..8ac213a55141 100644
--- a/drivers/s390/net/ctcm_mpc.c
+++ b/drivers/s390/net/ctcm_mpc.c
@@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo)
ctcm_clear_busy_do(dev);
}
- kfree(mpcginfo);
-
return;
}
@@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
CTCM_FUNTAIL, dev->name);
priv->stats.rx_dropped++;
/* mpcginfo only used for non-data transfers */
- kfree(mpcginfo);
if (do_debug_data)
ctcmpc_dump_skb(pskb, -8);
}
+ kfree(mpcginfo);
}
done:
@@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg)
}
break;
}
- kfree(mpcginfo);
CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n",
__func__, ch->id, grp->outstanding_xid2,
@@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg)
mpc_validate_xid(mpcginfo);
break;
}
- kfree(mpcginfo);
return;
}
diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c
index ded1930a00b2..e3813a7aa5e6 100644
--- a/drivers/s390/net/ctcm_sysfs.c
+++ b/drivers/s390/net/ctcm_sysfs.c
@@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev,
struct ctcm_priv *priv = dev_get_drvdata(dev);
int rc;
- ndev = priv->channel[CTCM_READ]->netdev;
- if (!(priv && priv->channel[CTCM_READ] && ndev)) {
+ if (!(priv && priv->channel[CTCM_READ] &&
+ priv->channel[CTCM_READ]->netdev)) {
CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev");
return -ENODEV;
}
+ ndev = priv->channel[CTCM_READ]->netdev;
rc = kstrtouint(buf, 0, &bs1);
if (rc)
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index bab9b34926c6..84c8981317b4 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1736,10 +1736,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd)
lcs_schedule_recovery(card);
break;
case LCS_CMD_STOPLAN:
- pr_warn("Stoplan for %s initiated by LGW\n",
- card->dev->name);
- if (card->dev)
+ if (card->dev) {
+ pr_warn("Stoplan for %s initiated by LGW\n",
+ card->dev->name);
netif_carrier_off(card->dev);
+ }
break;
default:
LCS_DBF_TEXT(5, trace, "noLGWcmd");
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 37d06f993b76..1d9be771f3ee 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -1172,9 +1172,8 @@ static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
case SCSI_ACCESS_STATE_OPTIMAL:
case SCSI_ACCESS_STATE_ACTIVE:
case SCSI_ACCESS_STATE_LBA:
- return BLK_STS_OK;
case SCSI_ACCESS_STATE_TRANSITIONING:
- return BLK_STS_AGAIN;
+ return BLK_STS_OK;
default:
req->rq_flags |= RQF_QUIET;
return BLK_STS_IOERR;
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index ef6e8cd8c26a..872a26376ccb 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1330,7 +1330,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
LPFC_SLI_INTF_IF_TYPE_0) {
/* FLOGI needs to be 3 for WQE FCFI */
- ct = ((SLI4_CT_FCFI >> 1) & 1) | (SLI4_CT_FCFI & 1);
+ ct = SLI4_CT_FCFI;
bf_set(wqe_ct, &wqe->els_req.wqe_com, ct);
/* Set the fcfi to the fcfi we registered with */
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index ba9dbb51b75f..f6b83853f7ee 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5528,7 +5528,9 @@ static char *lpfc_is_command_vm_io(struct scsi_cmnd *cmd)
{
struct bio *bio = scsi_cmd_to_rq(cmd)->bio;
- return bio ? blkcg_get_fc_appid(bio) : NULL;
+ if (!IS_ENABLED(CONFIG_BLK_CGROUP_FC_APPID) || !bio)
+ return NULL;
+ return blkcg_get_fc_appid(bio);
}
/**
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index bda2a7ba4e77..6adaf79e67cc 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -10720,10 +10720,10 @@ __lpfc_sli_prep_gen_req_s4(struct lpfc_iocbq *cmdiocbq, struct lpfc_dmabuf *bmp,
/* Words 0 - 2 */
bde = (struct ulp_bde64_le *)&cmdwqe->generic.bde;
- bde->addr_low = cpu_to_le32(putPaddrLow(bmp->phys));
- bde->addr_high = cpu_to_le32(putPaddrHigh(bmp->phys));
+ bde->addr_low = bpl->addr_low;
+ bde->addr_high = bpl->addr_high;
bde->type_size = cpu_to_le32(xmit_len);
- bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BLP_64);
+ bde->type_size |= cpu_to_le32(ULP_BDE64_TYPE_BDE_64);
/* Word 3 */
cmdwqe->gen_req.request_payload_len = xmit_len;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 85dbf81f3204..6dfcfd8e7337 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3826,6 +3826,9 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
spin_lock_irqsave(&cmd->cmd_lock, flags);
if (cmd->aborted) {
+ if (cmd->sg_mapped)
+ qlt_unmap_sg(vha, cmd);
+
spin_unlock_irqrestore(&cmd->cmd_lock, flags);
/*
* It's normal to see 2 calls in this path:
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index dc6e55761fd1..9694e2cfaf9a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -797,7 +797,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
case SD_LBP_FULL:
case SD_LBP_DISABLE:
blk_queue_max_discard_sectors(q, 0);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
return;
case SD_LBP_UNMAP:
@@ -830,7 +829,6 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
}
blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index ddd00efc4882..fbdb5124d7f7 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -41,7 +41,7 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi,
int result;
unsigned char *buffer;
- buffer = kmalloc(32, GFP_KERNEL);
+ buffer = kzalloc(32, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
@@ -55,10 +55,13 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi,
cgc.data_direction = DMA_FROM_DEVICE;
result = sr_do_ioctl(cd, &cgc);
+ if (result)
+ goto err;
tochdr->cdth_trk0 = buffer[2];
tochdr->cdth_trk1 = buffer[3];
+err:
kfree(buffer);
return result;
}
@@ -71,7 +74,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
int result;
unsigned char *buffer;
- buffer = kmalloc(32, GFP_KERNEL);
+ buffer = kzalloc(32, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
@@ -86,6 +89,8 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
cgc.data_direction = DMA_FROM_DEVICE;
result = sr_do_ioctl(cd, &cgc);
+ if (result)
+ goto err;
tocentry->cdte_ctrl = buffer[5] & 0xf;
tocentry->cdte_adr = buffer[5] >> 4;
@@ -98,6 +103,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi,
tocentry->cdte_addr.lba = (((((buffer[8] << 8) + buffer[9]) << 8)
+ buffer[10]) << 8) + buffer[11];
+err:
kfree(buffer);
return result;
}
@@ -384,7 +390,7 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
{
Scsi_CD *cd = cdi->handle;
struct packet_command cgc;
- char *buffer = kmalloc(32, GFP_KERNEL);
+ char *buffer = kzalloc(32, GFP_KERNEL);
int result;
if (!buffer)
@@ -400,10 +406,13 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
cgc.data_direction = DMA_FROM_DEVICE;
cgc.timeout = IOCTL_TIMEOUT;
result = sr_do_ioctl(cd, &cgc);
+ if (result)
+ goto err;
memcpy(mcn->medium_catalog_number, buffer + 9, 13);
mcn->medium_catalog_number[13] = 0;
+err:
kfree(buffer);
return result;
}
diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index 81099b68bbfb..588c0329b80c 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -1254,6 +1254,13 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
struct utp_hpb_rsp *rsp_field = &lrbp->ucd_rsp_ptr->hr;
int data_seg_len;
+ data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
+ & MASK_RSP_UPIU_DATA_SEG_LEN;
+
+ /* If data segment length is zero, rsp_field is not valid */
+ if (!data_seg_len)
+ return;
+
if (unlikely(lrbp->lun != rsp_field->lun)) {
struct scsi_device *sdev;
bool found = false;
@@ -1288,18 +1295,6 @@ void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
return;
}
- data_seg_len = be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_2)
- & MASK_RSP_UPIU_DATA_SEG_LEN;
-
- /* To flush remained rsp_list, we queue the map_work task */
- if (!data_seg_len) {
- if (!ufshpb_is_general_lun(hpb->lun))
- return;
-
- ufshpb_kick_map_work(hpb);
- return;
- }
-
BUILD_BUG_ON(sizeof(struct utp_hpb_rsp) != UTP_HPB_RSP_SIZE);
if (!ufshpb_is_hpb_rsp_valid(hba, lrbp, rsp_field))
diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c
index f04b961b96cd..ec58091fc948 100644
--- a/drivers/slimbus/qcom-ctrl.c
+++ b/drivers/slimbus/qcom-ctrl.c
@@ -510,9 +510,9 @@ static int qcom_slim_probe(struct platform_device *pdev)
}
ctrl->irq = platform_get_irq(pdev, 0);
- if (!ctrl->irq) {
+ if (ctrl->irq < 0) {
dev_err(&pdev->dev, "no slimbus IRQ\n");
- return -ENODEV;
+ return ctrl->irq;
}
sctrl = &ctrl->ctrl;
diff --git a/drivers/soc/imx/imx8m-blk-ctrl.c b/drivers/soc/imx/imx8m-blk-ctrl.c
index 122f9c884b38..ccd0577a771e 100644
--- a/drivers/soc/imx/imx8m-blk-ctrl.c
+++ b/drivers/soc/imx/imx8m-blk-ctrl.c
@@ -50,7 +50,7 @@ struct imx8m_blk_ctrl_domain_data {
u32 mipi_phy_rst_mask;
};
-#define DOMAIN_MAX_CLKS 3
+#define DOMAIN_MAX_CLKS 4
struct imx8m_blk_ctrl_domain {
struct generic_pm_domain genpd;
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index 92d9610df1fd..938017a60c8e 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -277,6 +277,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op)
static bool atmel_qspi_supports_op(struct spi_mem *mem,
const struct spi_mem_op *op)
{
+ if (!spi_mem_default_supports_op(mem, op))
+ return false;
+
if (atmel_qspi_find_mode(op) < 0)
return false;
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 616ada891974..19686fb47bb3 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1415,9 +1415,24 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem,
all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr &&
!op->data.dtr;
- /* Mixed DTR modes not supported. */
- if (!(all_true || all_false))
+ if (all_true) {
+ /* Right now we only support 8-8-8 DTR mode. */
+ if (op->cmd.nbytes && op->cmd.buswidth != 8)
+ return false;
+ if (op->addr.nbytes && op->addr.buswidth != 8)
+ return false;
+ if (op->data.nbytes && op->data.buswidth != 8)
+ return false;
+ } else if (all_false) {
+ /* Only 1-1-X ops are supported without DTR */
+ if (op->cmd.nbytes && op->cmd.buswidth > 1)
+ return false;
+ if (op->addr.nbytes && op->addr.buswidth > 1)
+ return false;
+ } else {
+ /* Mixed DTR modes are not supported. */
return false;
+ }
return spi_mem_default_supports_op(mem, op);
}
diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c
index a5ef7a526a7f..f6eec7a869b6 100644
--- a/drivers/spi/spi-intel-pci.c
+++ b/drivers/spi/spi-intel-pci.c
@@ -72,6 +72,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info },
+ { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info },
{ PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info },
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index 94fb09696677..d167699a1a96 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -960,7 +960,17 @@ static int __maybe_unused mtk_nor_suspend(struct device *dev)
static int __maybe_unused mtk_nor_resume(struct device *dev)
{
- return pm_runtime_force_resume(dev);
+ struct spi_controller *ctlr = dev_get_drvdata(dev);
+ struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+ int ret;
+
+ ret = pm_runtime_force_resume(dev);
+ if (ret)
+ return ret;
+
+ mtk_nor_init(sp);
+
+ return 0;
}
static const struct dev_pm_ops mtk_nor_pm_ops = {
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 6fe6a6bab3f4..ddf6c2a7212b 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3596,10 +3596,7 @@ static int iscsit_send_reject(
void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
{
int ord, cpu;
- cpumask_t conn_allowed_cpumask;
-
- cpumask_and(&conn_allowed_cpumask, iscsit_global->allowed_cpumask,
- cpu_online_mask);
+ cpumask_var_t conn_allowed_cpumask;
/*
* bitmap_id is assigned from iscsit_global->ts_bitmap from
@@ -3609,13 +3606,28 @@ void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
* iSCSI connection's RX/TX threads will be scheduled to
* execute upon.
*/
- cpumask_clear(conn->conn_cpumask);
- ord = conn->bitmap_id % cpumask_weight(&conn_allowed_cpumask);
- for_each_cpu(cpu, &conn_allowed_cpumask) {
- if (ord-- == 0) {
- cpumask_set_cpu(cpu, conn->conn_cpumask);
- return;
+ if (!zalloc_cpumask_var(&conn_allowed_cpumask, GFP_KERNEL)) {
+ ord = conn->bitmap_id % cpumask_weight(cpu_online_mask);
+ for_each_online_cpu(cpu) {
+ if (ord-- == 0) {
+ cpumask_set_cpu(cpu, conn->conn_cpumask);
+ return;
+ }
+ }
+ } else {
+ cpumask_and(conn_allowed_cpumask, iscsit_global->allowed_cpumask,
+ cpu_online_mask);
+
+ cpumask_clear(conn->conn_cpumask);
+ ord = conn->bitmap_id % cpumask_weight(conn_allowed_cpumask);
+ for_each_cpu(cpu, conn_allowed_cpumask) {
+ if (ord-- == 0) {
+ cpumask_set_cpu(cpu, conn->conn_cpumask);
+ free_cpumask_var(conn_allowed_cpumask);
+ return;
+ }
}
+ free_cpumask_var(conn_allowed_cpumask);
}
/*
* This should never be reached..
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 0cedcfe207b5..57b4fd56d92a 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1137,23 +1137,27 @@ static ssize_t lio_target_wwn_cpus_allowed_list_show(
static ssize_t lio_target_wwn_cpus_allowed_list_store(
struct config_item *item, const char *page, size_t count)
{
- int ret;
+ int ret = -ENOMEM;
char *orig;
- cpumask_t new_allowed_cpumask;
+ cpumask_var_t new_allowed_cpumask;
+
+ if (!zalloc_cpumask_var(&new_allowed_cpumask, GFP_KERNEL))
+ goto out;
orig = kstrdup(page, GFP_KERNEL);
if (!orig)
- return -ENOMEM;
+ goto out_free_cpumask;
- cpumask_clear(&new_allowed_cpumask);
- ret = cpulist_parse(orig, &new_allowed_cpumask);
+ ret = cpulist_parse(orig, new_allowed_cpumask);
+ if (!ret)
+ cpumask_copy(iscsit_global->allowed_cpumask,
+ new_allowed_cpumask);
kfree(orig);
- if (ret != 0)
- return ret;
-
- cpumask_copy(iscsit_global->allowed_cpumask, &new_allowed_cpumask);
- return count;
+out_free_cpumask:
+ free_cpumask_var(new_allowed_cpumask);
+out:
+ return ret ? ret : count;
}
CONFIGFS_ATTR(lio_target_wwn_, cpus_allowed_list);
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 44bb380e7390..25f33eb25337 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -829,28 +829,26 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
}
/*
- * Check if the underlying struct block_device request_queue supports
- * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
- * in ATA and we need to set TPE=1
+ * Check if the underlying struct block_device supports discard and if yes
+ * configure the UNMAP parameters.
*/
bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
- struct request_queue *q)
+ struct block_device *bdev)
{
- int block_size = queue_logical_block_size(q);
+ int block_size = bdev_logical_block_size(bdev);
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(bdev))
return false;
attrib->max_unmap_lba_count =
- q->limits.max_discard_sectors >> (ilog2(block_size) - 9);
+ bdev_max_discard_sectors(bdev) >> (ilog2(block_size) - 9);
/*
* Currently hardcoded to 1 in Linux/SCSI code..
*/
attrib->max_unmap_block_desc_count = 1;
- attrib->unmap_granularity = q->limits.discard_granularity / block_size;
- attrib->unmap_granularity_alignment = q->limits.discard_alignment /
- block_size;
- attrib->unmap_zeroes_data = !!(q->limits.max_write_zeroes_sectors);
+ attrib->unmap_granularity = bdev_discard_granularity(bdev) / block_size;
+ attrib->unmap_granularity_alignment =
+ bdev_discard_alignment(bdev) / block_size;
return true;
}
EXPORT_SYMBOL(target_configure_unmap_from_queue);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 8190b840065f..e68f1cc8ef98 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -134,10 +134,10 @@ static int fd_configure_device(struct se_device *dev)
*/
inode = file->f_mapping->host;
if (S_ISBLK(inode->i_mode)) {
- struct request_queue *q = bdev_get_queue(I_BDEV(inode));
+ struct block_device *bdev = I_BDEV(inode);
unsigned long long dev_size;
- fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode));
+ fd_dev->fd_block_size = bdev_logical_block_size(bdev);
/*
* Determine the number of bytes from i_size_read() minus
* one (1) logical sector from underlying struct block_device
@@ -150,7 +150,7 @@ static int fd_configure_device(struct se_device *dev)
dev_size, div_u64(dev_size, fd_dev->fd_block_size),
fd_dev->fd_block_size);
- if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+ if (target_configure_unmap_from_queue(&dev->dev_attrib, bdev))
pr_debug("IFILE: BLOCK Discard support available,"
" disabled by default\n");
/*
@@ -159,7 +159,7 @@ static int fd_configure_device(struct se_device *dev)
*/
dev->dev_attrib.max_write_same_len = 0xFFFF;
- if (blk_queue_nonrot(q))
+ if (bdev_nonrot(bdev))
dev->dev_attrib.is_nonrot = 1;
} else {
if (!(fd_dev->fbd_flags & FBDF_HAS_SIZE)) {
@@ -558,7 +558,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
ret = blkdev_issue_discard(bdev,
target_to_linux_sector(dev, lba),
target_to_linux_sector(dev, nolb),
- GFP_KERNEL, 0);
+ GFP_KERNEL);
if (ret < 0) {
pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n",
ret);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 87ede165ddba..378c80313a0f 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -119,7 +119,7 @@ static int iblock_configure_device(struct se_device *dev)
dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
dev->dev_attrib.hw_queue_depth = q->nr_requests;
- if (target_configure_unmap_from_queue(&dev->dev_attrib, q))
+ if (target_configure_unmap_from_queue(&dev->dev_attrib, bd))
pr_debug("IBLOCK: BLOCK Discard support available,"
" disabled by default\n");
@@ -133,7 +133,7 @@ static int iblock_configure_device(struct se_device *dev)
else
dev->dev_attrib.max_write_same_len = 0xFFFF;
- if (blk_queue_nonrot(q))
+ if (bdev_nonrot(bd))
dev->dev_attrib.is_nonrot = 1;
bi = bdev_get_integrity(bd);
@@ -434,7 +434,7 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
ret = blkdev_issue_discard(bdev,
target_to_linux_sector(dev, lba),
target_to_linux_sector(dev, nolb),
- GFP_KERNEL, 0);
+ GFP_KERNEL);
if (ret < 0) {
pr_err("blkdev_issue_discard() failed: %d\n", ret);
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -727,17 +727,16 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
if (data_direction == DMA_TO_DEVICE) {
struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
- struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd);
/*
* Force writethrough using REQ_FUA if a volatile write cache
* is not enabled, or if initiator set the Force Unit Access bit.
*/
opf = REQ_OP_WRITE;
miter_dir = SG_MITER_TO_SG;
- if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) {
+ if (bdev_fua(ib_dev->ibd_bd)) {
if (cmd->se_cmd_flags & SCF_FUA)
opf |= REQ_FUA;
- else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ else if (!bdev_write_cache(ib_dev->ibd_bd))
opf |= REQ_FUA;
}
} else {
@@ -886,11 +885,7 @@ iblock_parse_cdb(struct se_cmd *cmd)
static bool iblock_get_write_cache(struct se_device *dev)
{
- struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
- struct block_device *bd = ib_dev->ibd_bd;
- struct request_queue *q = bdev_get_queue(bd);
-
- return test_bit(QUEUE_FLAG_WC, &q->queue_flags);
+ return bdev_write_cache(IBLOCK_DEV(dev)->ibd_bd);
}
static const struct target_backend_ops iblock_ops = {
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index ff292b75e23f..bb3fb18b2316 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -588,7 +588,7 @@ static void pscsi_destroy_device(struct se_device *dev)
}
static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status,
- unsigned char *req_sense)
+ unsigned char *req_sense, int valid_data)
{
struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
struct scsi_device *sd = pdv->pdv_sd;
@@ -681,7 +681,7 @@ after_mode_select:
* back despite framework assumption that a
* check condition means there is no data
*/
- if (sd->type == TYPE_TAPE &&
+ if (sd->type == TYPE_TAPE && valid_data &&
cmd->data_direction == DMA_FROM_DEVICE) {
/*
* is sense data valid, fixed format,
@@ -818,24 +818,8 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b)
static void pscsi_bi_endio(struct bio *bio)
{
- bio_put(bio);
-}
-
-static inline struct bio *pscsi_get_bio(int nr_vecs)
-{
- struct bio *bio;
- /*
- * Use bio_malloc() following the comment in for bio -> struct request
- * in block/blk-core.c:blk_make_request()
- */
- bio = bio_kmalloc(GFP_KERNEL, nr_vecs);
- if (!bio) {
- pr_err("PSCSI: bio_kmalloc() failed\n");
- return NULL;
- }
- bio->bi_end_io = pscsi_bi_endio;
-
- return bio;
+ bio_uninit(bio);
+ kfree(bio);
}
static sense_reason_t
@@ -878,15 +862,12 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
if (!bio) {
new_bio:
nr_vecs = bio_max_segs(nr_pages);
- /*
- * Calls bio_kmalloc() and sets bio->bi_end_io()
- */
- bio = pscsi_get_bio(nr_vecs);
+ bio = bio_kmalloc(nr_vecs, GFP_KERNEL);
if (!bio)
goto fail;
-
- if (rw)
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs,
+ rw ? REQ_OP_WRITE : REQ_OP_READ);
+ bio->bi_end_io = pscsi_bi_endio;
pr_debug("PSCSI: Allocated bio: %p,"
" dir: %s nr_vecs: %d\n", bio,
@@ -912,11 +893,6 @@ new_bio:
goto fail;
}
- /*
- * Clear the pointer so that another bio will
- * be allocated with pscsi_get_bio() above.
- */
- bio = NULL;
goto new_bio;
}
@@ -1032,6 +1008,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
struct se_cmd *cmd = req->end_io_data;
struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
enum sam_status scsi_status = scmd->result & 0xff;
+ int valid_data = cmd->data_length - scmd->resid_len;
u8 *cdb = cmd->priv;
if (scsi_status != SAM_STAT_GOOD) {
@@ -1039,12 +1016,11 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
" 0x%02x Result: 0x%08x\n", cmd, cdb[0], scmd->result);
}
- pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer);
+ pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer, valid_data);
switch (host_byte(scmd->result)) {
case DID_OK:
- target_complete_cmd_with_length(cmd, scsi_status,
- cmd->data_length - scmd->resid_len);
+ target_complete_cmd_with_length(cmd, scsi_status, valid_data);
break;
default:
pr_debug("PSCSI Host Byte exception at cmd: %p CDB:"
diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index a5eb4ef46971..c9b3b2cfb2b2 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -865,6 +865,7 @@ err_rhashtable_free:
rhashtable_free_and_destroy(&optee->ffa.global_ids, rh_free_fn, NULL);
optee_supp_uninit(&optee->supp);
mutex_destroy(&optee->call_queue.mutex);
+ mutex_destroy(&optee->ffa.mutex);
err_unreg_supp_teedev:
tee_device_unregister(optee->supp_teedev);
err_unreg_teedev:
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index e37691e0bf20..0e5cc948373c 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -113,8 +113,10 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
bool "user_space"
select THERMAL_GOV_USER_SPACE
help
- Select this if you want to let the user space manage the
- platform thermals.
+ The Userspace governor allows to get trip point crossed
+ notification from the kernel via uevents. It is recommended
+ to use the netlink interface instead which gives richer
+ information about the thermal framework events.
config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
bool "power_allocator"
diff --git a/drivers/thermal/gov_user_space.c b/drivers/thermal/gov_user_space.c
index 64a18e354a20..a62a4e90bd3f 100644
--- a/drivers/thermal/gov_user_space.c
+++ b/drivers/thermal/gov_user_space.c
@@ -17,8 +17,7 @@
static int user_space_bind(struct thermal_zone_device *tz)
{
- pr_warn_once("Userspace governor deprecated: use thermal netlink " \
- "notification instead\n");
+ pr_info_once("Consider using thermal netlink events interface\n");
return 0;
}
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 4954800b9850..79931ddc582a 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -68,7 +68,7 @@ static int evaluate_odvp(struct int3400_thermal_priv *priv);
struct odvp_attr {
int odvp;
struct int3400_thermal_priv *priv;
- struct kobj_attribute attr;
+ struct device_attribute attr;
};
static ssize_t data_vault_read(struct file *file, struct kobject *kobj,
@@ -194,12 +194,31 @@ static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enab
return result;
}
+static int set_os_uuid_mask(struct int3400_thermal_priv *priv, u32 mask)
+{
+ int cap = 0;
+
+ /*
+ * Capability bits:
+ * Bit 0: set to 1 to indicate DPTF is active
+ * Bi1 1: set to 1 to active cooling is supported by user space daemon
+ * Bit 2: set to 1 to passive cooling is supported by user space daemon
+ * Bit 3: set to 1 to critical trip is handled by user space daemon
+ */
+ if (mask)
+ cap = (priv->os_uuid_mask << 1) | 0x01;
+
+ return int3400_thermal_run_osc(priv->adev->handle,
+ "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+ &cap);
+}
+
static ssize_t current_uuid_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
- int i;
+ int ret, i;
for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) {
if (!strncmp(buf, int3400_thermal_uuids[i],
@@ -231,19 +250,7 @@ static ssize_t current_uuid_store(struct device *dev,
}
if (priv->os_uuid_mask) {
- int cap, ret;
-
- /*
- * Capability bits:
- * Bit 0: set to 1 to indicate DPTF is active
- * Bi1 1: set to 1 to active cooling is supported by user space daemon
- * Bit 2: set to 1 to passive cooling is supported by user space daemon
- * Bit 3: set to 1 to critical trip is handled by user space daemon
- */
- cap = ((priv->os_uuid_mask << 1) | 0x01);
- ret = int3400_thermal_run_osc(priv->adev->handle,
- "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
- &cap);
+ ret = set_os_uuid_mask(priv, priv->os_uuid_mask);
if (ret)
return ret;
}
@@ -311,7 +318,7 @@ end:
return result;
}
-static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
+static ssize_t odvp_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct odvp_attr *odvp_attr;
@@ -469,17 +476,26 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
if (mode != thermal->mode) {
int enabled;
+ enabled = mode == THERMAL_DEVICE_ENABLED;
+
+ if (priv->os_uuid_mask) {
+ if (!enabled) {
+ priv->os_uuid_mask = 0;
+ result = set_os_uuid_mask(priv, priv->os_uuid_mask);
+ }
+ goto eval_odvp;
+ }
+
if (priv->current_uuid_index < 0 ||
priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
return -EINVAL;
- enabled = (mode == THERMAL_DEVICE_ENABLED);
result = int3400_thermal_run_osc(priv->adev->handle,
int3400_thermal_uuids[priv->current_uuid_index],
&enabled);
}
-
+eval_odvp:
evaluate_odvp(priv);
return result;
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index f154bada2906..1c4aac8464a7 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -610,9 +610,6 @@ cur_state_store(struct device *dev, struct device_attribute *attr,
unsigned long state;
int result;
- dev_warn_once(&cdev->device,
- "Setting cooling device state is deprecated\n");
-
if (sscanf(buf, "%ld\n", &state) != 1)
return -EINVAL;
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index fa92f727fdf8..fd8b86dde525 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -73,6 +73,8 @@ module_param(debug, int, 0600);
*/
#define MAX_MRU 1500
#define MAX_MTU 1500
+/* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */
+#define PROT_OVERHEAD 7
#define GSM_NET_TX_TIMEOUT (HZ*10)
/*
@@ -135,6 +137,7 @@ struct gsm_dlci {
int retries;
/* Uplink tty if active */
struct tty_port port; /* The tty bound to this DLCI if there is one */
+#define TX_SIZE 4096 /* Must be power of 2. */
struct kfifo fifo; /* Queue fifo for the DLCI */
int adaption; /* Adaption layer in use */
int prev_adaption;
@@ -219,7 +222,6 @@ struct gsm_mux {
int encoding;
u8 control;
u8 fcs;
- u8 received_fcs;
u8 *txframe; /* TX framing buffer */
/* Method for the receiver side */
@@ -231,6 +233,7 @@ struct gsm_mux {
int initiator; /* Did we initiate connection */
bool dead; /* Has the mux been shut down */
struct gsm_dlci *dlci[NUM_DLCI];
+ int old_c_iflag; /* termios c_iflag value before attach */
bool constipated; /* Asked by remote to shut up */
spinlock_t tx_lock;
@@ -271,10 +274,6 @@ static DEFINE_SPINLOCK(gsm_mux_lock);
static struct tty_driver *gsm_tty_driver;
-/* Save dlci open address */
-static int addr_open[256] = { 0 };
-/* Save dlci open count */
-static int addr_cnt;
/*
* This section of the driver logic implements the GSM encodings
* both the basic and the 'advanced'. Reliable transport is not
@@ -369,6 +368,7 @@ static const u8 gsm_fcs8[256] = {
#define GOOD_FCS 0xCF
static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len);
+static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk);
/**
* gsm_fcs_add - update FCS
@@ -832,7 +832,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci)
break;
case 2: /* Unstructed with modem bits.
Always one byte as we never send inline break data */
- *dp++ = gsm_encode_modem(dlci);
+ *dp++ = (gsm_encode_modem(dlci) << 1) | EA;
break;
}
WARN_ON(kfifo_out_locked(&dlci->fifo, dp , len, &dlci->lock) != len);
@@ -917,6 +917,66 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
}
/**
+ * gsm_dlci_modem_output - try and push modem status out of a DLCI
+ * @gsm: mux
+ * @dlci: the DLCI to pull modem status from
+ * @brk: break signal
+ *
+ * Push an empty frame in to the transmit queue to update the modem status
+ * bits and to transmit an optional break.
+ *
+ * Caller must hold the tx_lock of the mux.
+ */
+
+static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci,
+ u8 brk)
+{
+ u8 *dp = NULL;
+ struct gsm_msg *msg;
+ int size = 0;
+
+ /* for modem bits without break data */
+ switch (dlci->adaption) {
+ case 1: /* Unstructured */
+ break;
+ case 2: /* Unstructured with modem bits. */
+ size++;
+ if (brk > 0)
+ size++;
+ break;
+ default:
+ pr_err("%s: unsupported adaption %d\n", __func__,
+ dlci->adaption);
+ return -EINVAL;
+ }
+
+ msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+ if (!msg) {
+ pr_err("%s: gsm_data_alloc error", __func__);
+ return -ENOMEM;
+ }
+ dp = msg->data;
+ switch (dlci->adaption) {
+ case 1: /* Unstructured */
+ break;
+ case 2: /* Unstructured with modem bits. */
+ if (brk == 0) {
+ *dp++ = (gsm_encode_modem(dlci) << 1) | EA;
+ } else {
+ *dp++ = gsm_encode_modem(dlci) << 1;
+ *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */
+ }
+ break;
+ default:
+ /* Handled above */
+ break;
+ }
+
+ __gsm_data_queue(dlci, msg);
+ return size;
+}
+
+/**
* gsm_dlci_data_sweep - look for data to send
* @gsm: the GSM mux
*
@@ -1093,7 +1153,6 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
{
unsigned int addr = 0;
unsigned int modem = 0;
- unsigned int brk = 0;
struct gsm_dlci *dlci;
int len = clen;
int slen;
@@ -1123,17 +1182,8 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
return;
}
len--;
- if (len > 0) {
- while (gsm_read_ea(&brk, *dp++) == 0) {
- len--;
- if (len == 0)
- return;
- }
- modem <<= 7;
- modem |= (brk & 0x7f);
- }
tty = tty_port_tty_get(&dlci->port);
- gsm_process_modem(tty, dlci, modem, slen);
+ gsm_process_modem(tty, dlci, modem, slen - len);
if (tty) {
tty_wakeup(tty);
tty_kref_put(tty);
@@ -1193,7 +1243,6 @@ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen)
}
static void gsm_dlci_begin_close(struct gsm_dlci *dlci);
-static void gsm_dlci_close(struct gsm_dlci *dlci);
/**
* gsm_control_message - DLCI 0 control processing
@@ -1212,28 +1261,15 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
{
u8 buf[1];
unsigned long flags;
- struct gsm_dlci *dlci;
- int i;
- int address;
switch (command) {
case CMD_CLD: {
- if (addr_cnt > 0) {
- for (i = 0; i < addr_cnt; i++) {
- address = addr_open[i];
- dlci = gsm->dlci[address];
- gsm_dlci_close(dlci);
- addr_open[i] = 0;
- }
- }
+ struct gsm_dlci *dlci = gsm->dlci[0];
/* Modem wishes to close down */
- dlci = gsm->dlci[0];
if (dlci) {
dlci->dead = true;
gsm->dead = true;
- gsm_dlci_close(dlci);
- addr_cnt = 0;
- gsm_response(gsm, 0, UA|PF);
+ gsm_dlci_begin_close(dlci);
}
}
break;
@@ -1326,11 +1362,12 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command,
static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl)
{
- struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1, gsm->ftype);
+ struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 2, gsm->ftype);
if (msg == NULL)
return;
- msg->data[0] = (ctrl->cmd << 1) | 2 | EA; /* command */
- memcpy(msg->data + 1, ctrl->data, ctrl->len);
+ msg->data[0] = (ctrl->cmd << 1) | CR | EA; /* command */
+ msg->data[1] = (ctrl->len << 1) | EA;
+ memcpy(msg->data + 2, ctrl->data, ctrl->len);
gsm_data_queue(gsm->dlci[0], msg);
}
@@ -1353,7 +1390,6 @@ static void gsm_control_retransmit(struct timer_list *t)
spin_lock_irqsave(&gsm->control_lock, flags);
ctrl = gsm->pending_cmd;
if (ctrl) {
- gsm->cretries--;
if (gsm->cretries == 0) {
gsm->pending_cmd = NULL;
ctrl->error = -ETIMEDOUT;
@@ -1362,6 +1398,7 @@ static void gsm_control_retransmit(struct timer_list *t)
wake_up(&gsm->event);
return;
}
+ gsm->cretries--;
gsm_control_transmit(gsm, ctrl);
mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100);
}
@@ -1402,7 +1439,7 @@ retry:
/* If DLCI0 is in ADM mode skip retries, it won't respond */
if (gsm->dlci[0]->mode == DLCI_MODE_ADM)
- gsm->cretries = 1;
+ gsm->cretries = 0;
else
gsm->cretries = gsm->n2;
@@ -1450,20 +1487,22 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control)
static void gsm_dlci_close(struct gsm_dlci *dlci)
{
+ unsigned long flags;
+
del_timer(&dlci->t1);
if (debug & 8)
pr_debug("DLCI %d goes closed.\n", dlci->addr);
dlci->state = DLCI_CLOSED;
if (dlci->addr != 0) {
tty_port_tty_hangup(&dlci->port, false);
+ spin_lock_irqsave(&dlci->lock, flags);
kfifo_reset(&dlci->fifo);
+ spin_unlock_irqrestore(&dlci->lock, flags);
/* Ensure that gsmtty_open() can return. */
tty_port_set_initialized(&dlci->port, 0);
wake_up_interruptible(&dlci->port.open_wait);
} else
dlci->gsm->dead = true;
- /* Unregister gsmtty driver,report gsmtty dev remove uevent for user */
- tty_unregister_device(gsm_tty_driver, dlci->addr);
wake_up(&dlci->gsm->event);
/* A DLCI 0 close is a MUX termination so we need to kick that
back to userspace somehow */
@@ -1485,8 +1524,9 @@ static void gsm_dlci_open(struct gsm_dlci *dlci)
dlci->state = DLCI_OPEN;
if (debug & 8)
pr_debug("DLCI %d goes open.\n", dlci->addr);
- /* Register gsmtty driver,report gsmtty dev add uevent for user */
- tty_register_device(gsm_tty_driver, dlci->addr, NULL);
+ /* Send current modem state */
+ if (dlci->addr)
+ gsm_modem_update(dlci, 0);
wake_up(&dlci->gsm->event);
}
@@ -1619,10 +1659,12 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen)
if (len == 0)
return;
}
+ len--;
slen++;
tty = tty_port_tty_get(port);
if (tty) {
gsm_process_modem(tty, dlci, modem, slen);
+ tty_wakeup(tty);
tty_kref_put(tty);
}
fallthrough;
@@ -1690,7 +1732,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
return NULL;
spin_lock_init(&dlci->lock);
mutex_init(&dlci->mutex);
- if (kfifo_alloc(&dlci->fifo, 4096, GFP_KERNEL) < 0) {
+ if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) {
kfree(dlci);
return NULL;
}
@@ -1793,19 +1835,7 @@ static void gsm_queue(struct gsm_mux *gsm)
struct gsm_dlci *dlci;
u8 cr;
int address;
- int i, j, k, address_tmp;
- /* We have to sneak a look at the packet body to do the FCS.
- A somewhat layering violation in the spec */
- if ((gsm->control & ~PF) == UI)
- gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len);
- if (gsm->encoding == 0) {
- /* WARNING: gsm->received_fcs is used for
- gsm->encoding = 0 only.
- In this case it contain the last piece of data
- required to generate final CRC */
- gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs);
- }
if (gsm->fcs != GOOD_FCS) {
gsm->bad_fcs++;
if (debug & 4)
@@ -1836,11 +1866,6 @@ static void gsm_queue(struct gsm_mux *gsm)
else {
gsm_response(gsm, address, UA|PF);
gsm_dlci_open(dlci);
- /* Save dlci open address */
- if (address) {
- addr_open[addr_cnt] = address;
- addr_cnt++;
- }
}
break;
case DISC|PF:
@@ -1851,35 +1876,9 @@ static void gsm_queue(struct gsm_mux *gsm)
return;
}
/* Real close complete */
- if (!address) {
- if (addr_cnt > 0) {
- for (i = 0; i < addr_cnt; i++) {
- address = addr_open[i];
- dlci = gsm->dlci[address];
- gsm_dlci_close(dlci);
- addr_open[i] = 0;
- }
- }
- dlci = gsm->dlci[0];
- gsm_dlci_close(dlci);
- addr_cnt = 0;
- gsm_response(gsm, 0, UA|PF);
- } else {
- gsm_response(gsm, address, UA|PF);
- gsm_dlci_close(dlci);
- /* clear dlci address */
- for (j = 0; j < addr_cnt; j++) {
- address_tmp = addr_open[j];
- if (address_tmp == address) {
- for (k = j; k < addr_cnt; k++)
- addr_open[k] = addr_open[k+1];
- addr_cnt--;
- break;
- }
- }
- }
+ gsm_response(gsm, address, UA|PF);
+ gsm_dlci_close(dlci);
break;
- case UA:
case UA|PF:
if (cr == 0 || dlci == NULL)
break;
@@ -1993,19 +1992,25 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
break;
case GSM_DATA: /* Data */
gsm->buf[gsm->count++] = c;
- if (gsm->count == gsm->len)
+ if (gsm->count == gsm->len) {
+ /* Calculate final FCS for UI frames over all data */
+ if ((gsm->control & ~PF) != UIH) {
+ gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf,
+ gsm->count);
+ }
gsm->state = GSM_FCS;
+ }
break;
case GSM_FCS: /* FCS follows the packet */
- gsm->received_fcs = c;
- gsm_queue(gsm);
+ gsm->fcs = gsm_fcs_add(gsm->fcs, c);
gsm->state = GSM_SSOF;
break;
case GSM_SSOF:
- if (c == GSM0_SOF) {
- gsm->state = GSM_SEARCH;
- break;
- }
+ gsm->state = GSM_SEARCH;
+ if (c == GSM0_SOF)
+ gsm_queue(gsm);
+ else
+ gsm->bad_size++;
break;
default:
pr_debug("%s: unhandled state: %d\n", __func__, gsm->state);
@@ -2023,12 +2028,35 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
{
+ /* handle XON/XOFF */
+ if ((c & ISO_IEC_646_MASK) == XON) {
+ gsm->constipated = true;
+ return;
+ } else if ((c & ISO_IEC_646_MASK) == XOFF) {
+ gsm->constipated = false;
+ /* Kick the link in case it is idling */
+ gsm_data_kick(gsm, NULL);
+ return;
+ }
if (c == GSM1_SOF) {
- /* EOF is only valid in frame if we have got to the data state
- and received at least one byte (the FCS) */
- if (gsm->state == GSM_DATA && gsm->count) {
- /* Extract the FCS */
+ /* EOF is only valid in frame if we have got to the data state */
+ if (gsm->state == GSM_DATA) {
+ if (gsm->count < 1) {
+ /* Missing FSC */
+ gsm->malformed++;
+ gsm->state = GSM_START;
+ return;
+ }
+ /* Remove the FCS from data */
gsm->count--;
+ if ((gsm->control & ~PF) != UIH) {
+ /* Calculate final FCS for UI frames over all
+ * data but FCS
+ */
+ gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf,
+ gsm->count);
+ }
+ /* Add the FCS itself to test against GOOD_FCS */
gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]);
gsm->len = gsm->count;
gsm_queue(gsm);
@@ -2037,7 +2065,8 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
}
/* Any partial frame was a runt so go back to start */
if (gsm->state != GSM_START) {
- gsm->malformed++;
+ if (gsm->state != GSM_SEARCH)
+ gsm->malformed++;
gsm->state = GSM_START;
}
/* A SOF in GSM_START means we are still reading idling or
@@ -2106,74 +2135,43 @@ static void gsm_error(struct gsm_mux *gsm)
gsm->io_error++;
}
-static int gsm_disconnect(struct gsm_mux *gsm)
-{
- struct gsm_dlci *dlci = gsm->dlci[0];
- struct gsm_control *gc;
-
- if (!dlci)
- return 0;
-
- /* In theory disconnecting DLCI 0 is sufficient but for some
- modems this is apparently not the case. */
- gc = gsm_control_send(gsm, CMD_CLD, NULL, 0);
- if (gc)
- gsm_control_wait(gsm, gc);
-
- del_timer_sync(&gsm->t2_timer);
- /* Now we are sure T2 has stopped */
-
- gsm_dlci_begin_close(dlci);
- wait_event_interruptible(gsm->event,
- dlci->state == DLCI_CLOSED);
-
- if (signal_pending(current))
- return -EINTR;
-
- return 0;
-}
-
/**
* gsm_cleanup_mux - generic GSM protocol cleanup
* @gsm: our mux
+ * @disc: disconnect link?
*
* Clean up the bits of the mux which are the same for all framing
* protocols. Remove the mux from the mux table, stop all the timers
* and then shut down each device hanging up the channels as we go.
*/
-static void gsm_cleanup_mux(struct gsm_mux *gsm)
+static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
{
int i;
struct gsm_dlci *dlci = gsm->dlci[0];
struct gsm_msg *txq, *ntxq;
gsm->dead = true;
+ mutex_lock(&gsm->mutex);
- spin_lock(&gsm_mux_lock);
- for (i = 0; i < MAX_MUX; i++) {
- if (gsm_mux[i] == gsm) {
- gsm_mux[i] = NULL;
- break;
+ if (dlci) {
+ if (disc && dlci->state != DLCI_CLOSED) {
+ gsm_dlci_begin_close(dlci);
+ wait_event(gsm->event, dlci->state == DLCI_CLOSED);
}
+ dlci->dead = true;
}
- spin_unlock(&gsm_mux_lock);
- /* open failed before registering => nothing to do */
- if (i == MAX_MUX)
- return;
+ /* Finish outstanding timers, making sure they are done */
del_timer_sync(&gsm->t2_timer);
- /* Now we are sure T2 has stopped */
- if (dlci)
- dlci->dead = true;
- /* Free up any link layer users */
- mutex_lock(&gsm->mutex);
- for (i = 0; i < NUM_DLCI; i++)
+ /* Free up any link layer users and finally the control channel */
+ for (i = NUM_DLCI - 1; i >= 0; i--)
if (gsm->dlci[i])
gsm_dlci_release(gsm->dlci[i]);
mutex_unlock(&gsm->mutex);
/* Now wipe the queues */
+ tty_ldisc_flush(gsm->tty);
list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list)
kfree(txq);
INIT_LIST_HEAD(&gsm->tx_list);
@@ -2191,7 +2189,6 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm)
static int gsm_activate_mux(struct gsm_mux *gsm)
{
struct gsm_dlci *dlci;
- int i = 0;
timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0);
init_waitqueue_head(&gsm->event);
@@ -2203,18 +2200,6 @@ static int gsm_activate_mux(struct gsm_mux *gsm)
else
gsm->receive = gsm1_receive;
- spin_lock(&gsm_mux_lock);
- for (i = 0; i < MAX_MUX; i++) {
- if (gsm_mux[i] == NULL) {
- gsm->num = i;
- gsm_mux[i] = gsm;
- break;
- }
- }
- spin_unlock(&gsm_mux_lock);
- if (i == MAX_MUX)
- return -EBUSY;
-
dlci = gsm_dlci_alloc(gsm, 0);
if (dlci == NULL)
return -ENOMEM;
@@ -2230,6 +2215,15 @@ static int gsm_activate_mux(struct gsm_mux *gsm)
*/
static void gsm_free_mux(struct gsm_mux *gsm)
{
+ int i;
+
+ for (i = 0; i < MAX_MUX; i++) {
+ if (gsm == gsm_mux[i]) {
+ gsm_mux[i] = NULL;
+ break;
+ }
+ }
+ mutex_destroy(&gsm->mutex);
kfree(gsm->txframe);
kfree(gsm->buf);
kfree(gsm);
@@ -2249,12 +2243,20 @@ static void gsm_free_muxr(struct kref *ref)
static inline void mux_get(struct gsm_mux *gsm)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&gsm_mux_lock, flags);
kref_get(&gsm->ref);
+ spin_unlock_irqrestore(&gsm_mux_lock, flags);
}
static inline void mux_put(struct gsm_mux *gsm)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&gsm_mux_lock, flags);
kref_put(&gsm->ref, gsm_free_muxr);
+ spin_unlock_irqrestore(&gsm_mux_lock, flags);
}
static inline unsigned int mux_num_to_base(struct gsm_mux *gsm)
@@ -2275,6 +2277,7 @@ static inline unsigned int mux_line_to_num(unsigned int line)
static struct gsm_mux *gsm_alloc_mux(void)
{
+ int i;
struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL);
if (gsm == NULL)
return NULL;
@@ -2283,7 +2286,7 @@ static struct gsm_mux *gsm_alloc_mux(void)
kfree(gsm);
return NULL;
}
- gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL);
+ gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL);
if (gsm->txframe == NULL) {
kfree(gsm->buf);
kfree(gsm);
@@ -2304,6 +2307,26 @@ static struct gsm_mux *gsm_alloc_mux(void)
gsm->mtu = 64;
gsm->dead = true; /* Avoid early tty opens */
+ /* Store the instance to the mux array or abort if no space is
+ * available.
+ */
+ spin_lock(&gsm_mux_lock);
+ for (i = 0; i < MAX_MUX; i++) {
+ if (!gsm_mux[i]) {
+ gsm_mux[i] = gsm;
+ gsm->num = i;
+ break;
+ }
+ }
+ spin_unlock(&gsm_mux_lock);
+ if (i == MAX_MUX) {
+ mutex_destroy(&gsm->mutex);
+ kfree(gsm->txframe);
+ kfree(gsm->buf);
+ kfree(gsm);
+ return NULL;
+ }
+
return gsm;
}
@@ -2330,6 +2353,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm,
static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
{
+ int ret = 0;
int need_close = 0;
int need_restart = 0;
@@ -2339,7 +2363,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
/* Check the MRU/MTU range looks sane */
if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8)
return -EINVAL;
- if (c->n2 < 3)
+ if (c->n2 > 255)
return -EINVAL;
if (c->encapsulation > 1) /* Basic, advanced, no I */
return -EINVAL;
@@ -2370,19 +2394,11 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
/*
* Close down what is needed, restart and initiate the new
- * configuration
+ * configuration. On the first time there is no DLCI[0]
+ * and closing or cleaning up is not necessary.
*/
-
- if (gsm->initiator && (need_close || need_restart)) {
- int ret;
-
- ret = gsm_disconnect(gsm);
-
- if (ret)
- return ret;
- }
- if (need_restart)
- gsm_cleanup_mux(gsm);
+ if (need_close || need_restart)
+ gsm_cleanup_mux(gsm, true);
gsm->initiator = c->initiator;
gsm->mru = c->mru;
@@ -2405,10 +2421,13 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
* FIXME: We need to separate activation/deactivation from adding
* and removing from the mux array
*/
- if (need_restart)
- gsm_activate_mux(gsm);
- if (gsm->initiator && need_close)
- gsm_dlci_begin_open(gsm->dlci[0]);
+ if (gsm->dead) {
+ ret = gsm_activate_mux(gsm);
+ if (ret)
+ return ret;
+ if (gsm->initiator)
+ gsm_dlci_begin_open(gsm->dlci[0]);
+ }
return 0;
}
@@ -2450,25 +2469,26 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
int ret, i;
gsm->tty = tty_kref_get(tty);
+ /* Turn off tty XON/XOFF handling to handle it explicitly. */
+ gsm->old_c_iflag = tty->termios.c_iflag;
+ tty->termios.c_iflag &= (IXON | IXOFF);
ret = gsm_activate_mux(gsm);
if (ret != 0)
tty_kref_put(gsm->tty);
else {
/* Don't register device 0 - this is the control channel and not
a usable tty interface */
- if (gsm->initiator) {
- base = mux_num_to_base(gsm); /* Base for this MUX */
- for (i = 1; i < NUM_DLCI; i++) {
- struct device *dev;
+ base = mux_num_to_base(gsm); /* Base for this MUX */
+ for (i = 1; i < NUM_DLCI; i++) {
+ struct device *dev;
- dev = tty_register_device(gsm_tty_driver,
+ dev = tty_register_device(gsm_tty_driver,
base + i, NULL);
- if (IS_ERR(dev)) {
- for (i--; i >= 1; i--)
- tty_unregister_device(gsm_tty_driver,
- base + i);
- return PTR_ERR(dev);
- }
+ if (IS_ERR(dev)) {
+ for (i--; i >= 1; i--)
+ tty_unregister_device(gsm_tty_driver,
+ base + i);
+ return PTR_ERR(dev);
}
}
}
@@ -2490,11 +2510,10 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
int i;
WARN_ON(tty != gsm->tty);
- if (gsm->initiator) {
- for (i = 1; i < NUM_DLCI; i++)
- tty_unregister_device(gsm_tty_driver, base + i);
- }
- gsm_cleanup_mux(gsm);
+ for (i = 1; i < NUM_DLCI; i++)
+ tty_unregister_device(gsm_tty_driver, base + i);
+ /* Restore tty XON/XOFF handling. */
+ gsm->tty->termios.c_iflag = gsm->old_c_iflag;
tty_kref_put(gsm->tty);
gsm->tty = NULL;
}
@@ -2559,6 +2578,12 @@ static void gsmld_close(struct tty_struct *tty)
{
struct gsm_mux *gsm = tty->disc_data;
+ /* The ldisc locks and closes the port before calling our close. This
+ * means we have no way to do a proper disconnect. We will not bother
+ * to do one.
+ */
+ gsm_cleanup_mux(gsm, false);
+
gsmld_detach_gsm(tty, gsm);
gsmld_flush_buffer(tty);
@@ -2597,7 +2622,7 @@ static int gsmld_open(struct tty_struct *tty)
ret = gsmld_attach_gsm(tty, gsm);
if (ret != 0) {
- gsm_cleanup_mux(gsm);
+ gsm_cleanup_mux(gsm, false);
mux_put(gsm);
}
return ret;
@@ -2952,28 +2977,78 @@ static struct tty_ldisc_ops tty_ldisc_packet = {
* Virtual tty side
*/
-#define TX_SIZE 512
+/**
+ * gsm_modem_upd_via_data - send modem bits via convergence layer
+ * @dlci: channel
+ * @brk: break signal
+ *
+ * Send an empty frame to signal mobile state changes and to transmit the
+ * break signal for adaption 2.
+ */
+
+static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk)
+{
+ struct gsm_mux *gsm = dlci->gsm;
+ unsigned long flags;
+
+ if (dlci->state != DLCI_OPEN || dlci->adaption != 2)
+ return;
+
+ spin_lock_irqsave(&gsm->tx_lock, flags);
+ gsm_dlci_modem_output(gsm, dlci, brk);
+ spin_unlock_irqrestore(&gsm->tx_lock, flags);
+}
+
+/**
+ * gsm_modem_upd_via_msc - send modem bits via control frame
+ * @dlci: channel
+ * @brk: break signal
+ */
-static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk)
+static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk)
{
- u8 modembits[5];
+ u8 modembits[3];
struct gsm_control *ctrl;
int len = 2;
- if (brk)
- len++;
+ if (dlci->gsm->encoding != 0)
+ return 0;
- modembits[0] = len << 1 | EA; /* Data bytes */
- modembits[1] = dlci->addr << 2 | 3; /* DLCI, EA, 1 */
- modembits[2] = gsm_encode_modem(dlci) << 1 | EA;
- if (brk)
- modembits[3] = brk << 4 | 2 | EA; /* Valid, EA */
- ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1);
+ modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */
+ if (!brk) {
+ modembits[1] = (gsm_encode_modem(dlci) << 1) | EA;
+ } else {
+ modembits[1] = gsm_encode_modem(dlci) << 1;
+ modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */
+ len++;
+ }
+ ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len);
if (ctrl == NULL)
return -ENOMEM;
return gsm_control_wait(dlci->gsm, ctrl);
}
+/**
+ * gsm_modem_update - send modem status line state
+ * @dlci: channel
+ * @brk: break signal
+ */
+
+static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk)
+{
+ if (dlci->adaption == 2) {
+ /* Send convergence layer type 2 empty data frame. */
+ gsm_modem_upd_via_data(dlci, brk);
+ return 0;
+ } else if (dlci->gsm->encoding == 0) {
+ /* Send as MSC control message. */
+ return gsm_modem_upd_via_msc(dlci, brk);
+ }
+
+ /* Modem status lines are not supported. */
+ return -EPROTONOSUPPORT;
+}
+
static int gsm_carrier_raised(struct tty_port *port)
{
struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port);
@@ -3006,7 +3081,7 @@ static void gsm_dtr_rts(struct tty_port *port, int onoff)
modem_tx &= ~(TIOCM_DTR | TIOCM_RTS);
if (modem_tx != dlci->modem_tx) {
dlci->modem_tx = modem_tx;
- gsmtty_modem_update(dlci, 0);
+ gsm_modem_update(dlci, 0);
}
}
@@ -3141,7 +3216,7 @@ static unsigned int gsmtty_write_room(struct tty_struct *tty)
struct gsm_dlci *dlci = tty->driver_data;
if (dlci->state == DLCI_CLOSED)
return 0;
- return TX_SIZE - kfifo_len(&dlci->fifo);
+ return kfifo_avail(&dlci->fifo);
}
static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
@@ -3155,13 +3230,17 @@ static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty)
static void gsmtty_flush_buffer(struct tty_struct *tty)
{
struct gsm_dlci *dlci = tty->driver_data;
+ unsigned long flags;
+
if (dlci->state == DLCI_CLOSED)
return;
/* Caution needed: If we implement reliable transport classes
then the data being transmitted can't simply be junked once
it has first hit the stack. Until then we can just blow it
away */
+ spin_lock_irqsave(&dlci->lock, flags);
kfifo_reset(&dlci->fifo);
+ spin_unlock_irqrestore(&dlci->lock, flags);
/* Need to unhook this DLCI from the transmit queue logic */
}
@@ -3193,7 +3272,7 @@ static int gsmtty_tiocmset(struct tty_struct *tty,
if (modem_tx != dlci->modem_tx) {
dlci->modem_tx = modem_tx;
- return gsmtty_modem_update(dlci, 0);
+ return gsm_modem_update(dlci, 0);
}
return 0;
}
@@ -3254,7 +3333,7 @@ static void gsmtty_throttle(struct tty_struct *tty)
dlci->modem_tx &= ~TIOCM_RTS;
dlci->throttled = true;
/* Send an MSC with RTS cleared */
- gsmtty_modem_update(dlci, 0);
+ gsm_modem_update(dlci, 0);
}
static void gsmtty_unthrottle(struct tty_struct *tty)
@@ -3266,7 +3345,7 @@ static void gsmtty_unthrottle(struct tty_struct *tty)
dlci->modem_tx |= TIOCM_RTS;
dlci->throttled = false;
/* Send an MSC with RTS set */
- gsmtty_modem_update(dlci, 0);
+ gsm_modem_update(dlci, 0);
}
static int gsmtty_break_ctl(struct tty_struct *tty, int state)
@@ -3284,7 +3363,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state)
if (encode > 0x0F)
encode = 0x0F; /* Best effort */
}
- return gsmtty_modem_update(dlci, encode);
+ return gsm_modem_update(dlci, encode);
}
static void gsmtty_cleanup(struct tty_struct *tty)
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index f4a0caa56f84..21053db93ff1 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -37,6 +37,7 @@
#define MTK_UART_IER_RTSI 0x40 /* Enable RTS Modem status interrupt */
#define MTK_UART_IER_CTSI 0x80 /* Enable CTS Modem status interrupt */
+#define MTK_UART_EFR 38 /* I/O: Extended Features Register */
#define MTK_UART_EFR_EN 0x10 /* Enable enhancement feature */
#define MTK_UART_EFR_RTS 0x40 /* Enable hardware rx flow control */
#define MTK_UART_EFR_CTS 0x80 /* Enable hardware tx flow control */
@@ -53,6 +54,12 @@
#define MTK_UART_TX_TRIGGER 1
#define MTK_UART_RX_TRIGGER MTK_UART_RX_SIZE
+#define MTK_UART_FEATURE_SEL 39 /* Feature Selection register */
+#define MTK_UART_FEAT_NEWRMAP BIT(0) /* Use new register map */
+
+#define MTK_UART_XON1 40 /* I/O: Xon character 1 */
+#define MTK_UART_XOFF1 42 /* I/O: Xoff character 1 */
+
#ifdef CONFIG_SERIAL_8250_DMA
enum dma_rx_status {
DMA_RX_START = 0,
@@ -169,7 +176,7 @@ static void mtk8250_dma_enable(struct uart_8250_port *up)
MTK_UART_DMA_EN_RX | MTK_UART_DMA_EN_TX);
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
- serial_out(up, UART_EFR, UART_EFR_ECB);
+ serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
serial_out(up, UART_LCR, lcr);
if (dmaengine_slave_config(dma->rxchan, &dma->rxconf) != 0)
@@ -232,7 +239,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
int lcr = serial_in(up, UART_LCR);
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
- serial_out(up, UART_EFR, UART_EFR_ECB);
+ serial_out(up, MTK_UART_EFR, UART_EFR_ECB);
serial_out(up, UART_LCR, lcr);
lcr = serial_in(up, UART_LCR);
@@ -241,7 +248,7 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
serial_out(up, MTK_UART_ESCAPE_DAT, MTK_UART_ESCAPE_CHAR);
serial_out(up, MTK_UART_ESCAPE_EN, 0x00);
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
- serial_out(up, UART_EFR, serial_in(up, UART_EFR) &
+ serial_out(up, MTK_UART_EFR, serial_in(up, MTK_UART_EFR) &
(~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK)));
serial_out(up, UART_LCR, lcr);
mtk8250_disable_intrs(up, MTK_UART_IER_XOFFI |
@@ -255,8 +262,8 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
/*enable hw flow control*/
- serial_out(up, UART_EFR, MTK_UART_EFR_HW_FC |
- (serial_in(up, UART_EFR) &
+ serial_out(up, MTK_UART_EFR, MTK_UART_EFR_HW_FC |
+ (serial_in(up, MTK_UART_EFR) &
(~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
serial_out(up, UART_LCR, lcr);
@@ -270,12 +277,12 @@ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
/*enable sw flow control */
- serial_out(up, UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
- (serial_in(up, UART_EFR) &
+ serial_out(up, MTK_UART_EFR, MTK_UART_EFR_XON1_XOFF1 |
+ (serial_in(up, MTK_UART_EFR) &
(~(MTK_UART_EFR_HW_FC | MTK_UART_EFR_SW_FC_MASK))));
- serial_out(up, UART_XON1, START_CHAR(port->state->port.tty));
- serial_out(up, UART_XOFF1, STOP_CHAR(port->state->port.tty));
+ serial_out(up, MTK_UART_XON1, START_CHAR(port->state->port.tty));
+ serial_out(up, MTK_UART_XOFF1, STOP_CHAR(port->state->port.tty));
serial_out(up, UART_LCR, lcr);
mtk8250_disable_intrs(up, MTK_UART_IER_CTSI|MTK_UART_IER_RTSI);
mtk8250_enable_intrs(up, MTK_UART_IER_XOFFI);
@@ -568,6 +575,10 @@ static int mtk8250_probe(struct platform_device *pdev)
uart.dma = data->dma;
#endif
+ /* Set AP UART new register map */
+ writel(MTK_UART_FEAT_NEWRMAP, uart.port.membase +
+ (MTK_UART_FEATURE_SEL << uart.port.regshift));
+
/* Disable Rate Fix function */
writel(0x0, uart.port.membase +
(MTK_UART_RATE_FIX << uart.port.regshift));
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index e17e97ea86fa..a293e9f107d0 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -2667,7 +2667,7 @@ enum pci_board_num_t {
pbn_panacom2,
pbn_panacom4,
pbn_plx_romulus,
- pbn_endrun_2_4000000,
+ pbn_endrun_2_3906250,
pbn_oxsemi,
pbn_oxsemi_1_3906250,
pbn_oxsemi_2_3906250,
@@ -3195,10 +3195,10 @@ static struct pciserial_board pci_boards[] = {
* signal now many ports are available
* 2 port 952 Uart support
*/
- [pbn_endrun_2_4000000] = {
+ [pbn_endrun_2_3906250] = {
.flags = FL_BASE0,
.num_ports = 2,
- .base_baud = 4000000,
+ .base_baud = 3906250,
.uart_offset = 0x200,
.first_offset = 0x1000,
},
@@ -4115,7 +4115,7 @@ static const struct pci_device_id serial_pci_tbl[] = {
*/
{ PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588,
PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- pbn_endrun_2_4000000 },
+ pbn_endrun_2_3906250 },
/*
* Quatech cards. These actually have configurable clocks but for
* now we just use the default.
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 318af6f13605..1fbd5bf264be 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1675,11 +1675,11 @@ static void serial8250_start_tx(struct uart_port *port)
struct uart_8250_port *up = up_to_u8250p(port);
struct uart_8250_em485 *em485 = up->em485;
- serial8250_rpm_get_tx(up);
-
if (!port->x_char && uart_circ_empty(&port->state->xmit))
return;
+ serial8250_rpm_get_tx(up);
+
if (em485 &&
em485->active_timer == &em485->start_tx_timer)
return;
@@ -3329,7 +3329,7 @@ static void serial8250_console_restore(struct uart_8250_port *up)
serial8250_set_divisor(port, baud, quot, frac);
serial_port_out(port, UART_LCR, up->lcr);
- serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
+ serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
}
/*
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 51ecb050ae40..4d11a3e547f9 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1255,13 +1255,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap)
static void pl011_rs485_tx_stop(struct uart_amba_port *uap)
{
+ /*
+ * To be on the safe side only time out after twice as many iterations
+ * as fifo size.
+ */
+ const int MAX_TX_DRAIN_ITERS = uap->port.fifosize * 2;
struct uart_port *port = &uap->port;
int i = 0;
u32 cr;
/* Wait until hardware tx queue is empty */
while (!pl011_tx_empty(port)) {
- if (i == port->fifosize) {
+ if (i > MAX_TX_DRAIN_ITERS) {
dev_warn(port->dev,
"timeout while draining hardware tx queue\n");
break;
@@ -2052,7 +2057,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
* with the given baud rate. We use this as the poll interval when we
* wait for the tx queue to empty.
*/
- uap->rs485_tx_drain_interval = (bits * 1000 * 1000) / baud;
+ uap->rs485_tx_drain_interval = DIV_ROUND_UP(bits * 1000 * 1000, baud);
pl011_setup_status_masks(port, termios);
diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c
index 6d70fea76bb3..e37a917b9dbb 100644
--- a/drivers/tty/serial/digicolor-usart.c
+++ b/drivers/tty/serial/digicolor-usart.c
@@ -471,11 +471,10 @@ static int digicolor_uart_probe(struct platform_device *pdev)
if (IS_ERR(uart_clk))
return PTR_ERR(uart_clk);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- dp->port.mapbase = res->start;
- dp->port.membase = devm_ioremap_resource(&pdev->dev, res);
+ dp->port.membase = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(dp->port.membase))
return PTR_ERR(dp->port.membase);
+ dp->port.mapbase = res->start;
irq = platform_get_irq(pdev, 0);
if (irq < 0)
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 87789872f400..be12fee94db5 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -2664,6 +2664,7 @@ static int lpuart_probe(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
struct lpuart_port *sport;
struct resource *res;
+ irq_handler_t handler;
int ret;
sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
@@ -2741,17 +2742,11 @@ static int lpuart_probe(struct platform_device *pdev)
if (lpuart_is_32(sport)) {
lpuart_reg.cons = LPUART32_CONSOLE;
- ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0,
- DRIVER_NAME, sport);
+ handler = lpuart32_int;
} else {
lpuart_reg.cons = LPUART_CONSOLE;
- ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0,
- DRIVER_NAME, sport);
+ handler = lpuart_int;
}
-
- if (ret)
- goto failed_irq_request;
-
ret = uart_add_one_port(&lpuart_reg, &sport->port);
if (ret)
goto failed_attach_port;
@@ -2773,13 +2768,18 @@ static int lpuart_probe(struct platform_device *pdev)
sport->port.rs485_config(&sport->port, &sport->port.rs485);
+ ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0,
+ DRIVER_NAME, sport);
+ if (ret)
+ goto failed_irq_request;
+
return 0;
+failed_irq_request:
failed_get_rs485:
failed_reset:
uart_remove_one_port(&lpuart_reg, &sport->port);
failed_attach_port:
-failed_irq_request:
lpuart_disable_clks(sport);
failed_clock_enable:
failed_out_of_range:
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index fd38e6ed4fda..a2100be8d554 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -1448,7 +1448,7 @@ static int imx_uart_startup(struct uart_port *port)
imx_uart_writel(sport, ucr1, UCR1);
ucr4 = imx_uart_readl(sport, UCR4) & ~(UCR4_OREN | UCR4_INVR);
- if (!sport->dma_is_enabled)
+ if (!dma_is_inited)
ucr4 |= UCR4_OREN;
if (sport->inverted_rx)
ucr4 |= UCR4_INVR;
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index e857fb61efbf..5fb201c1b563 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1238,12 +1238,10 @@ static void sc16is7xx_shutdown(struct uart_port *port)
/* Disable all interrupts */
sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0);
- /* Disable TX/RX, clear auto RS485 and RTS invert */
+ /* Disable TX/RX */
sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG,
SC16IS7XX_EFCR_RXDISABLE_BIT |
- SC16IS7XX_EFCR_TXDISABLE_BIT |
- SC16IS7XX_EFCR_AUTO_RS485_BIT |
- SC16IS7XX_EFCR_RTS_INVERT_BIT,
+ SC16IS7XX_EFCR_TXDISABLE_BIT,
SC16IS7XX_EFCR_RXDISABLE_BIT |
SC16IS7XX_EFCR_TXDISABLE_BIT);
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index f9af7ebe003d..d6d515d598dc 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -2684,6 +2684,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
struct usb_request *request;
struct cdns3_request *priv_req;
struct cdns3_trb *trb = NULL;
+ struct cdns3_trb trb_tmp;
int ret;
int val;
@@ -2693,8 +2694,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
if (request) {
priv_req = to_cdns3_request(request);
trb = priv_req->trb;
- if (trb)
+ if (trb) {
+ trb_tmp = *trb;
trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE);
+ }
}
writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd);
@@ -2709,7 +2712,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep)
if (request) {
if (trb)
- trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE);
+ *trb = trb_tmp;
cdns3_rearm_transfer(priv_ep, 1);
}
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 7f2c83f299d3..eebe782380fb 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -774,6 +774,7 @@ static int wdm_release(struct inode *inode, struct file *file)
poison_urbs(desc);
spin_lock_irq(&desc->iuspin);
desc->resp_count = 0;
+ clear_bit(WDM_RESPONDING, &desc->flags);
spin_unlock_irq(&desc->iuspin);
desc->manage_power(desc->intf, 0);
unpoison_urbs(desc);
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 6abb7294e919..b5b85bf80329 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1209,12 +1209,16 @@ static int do_proc_control(struct usb_dev_state *ps,
usb_unlock_device(dev);
i = usbfs_start_wait_urb(urb, tmo, &actlen);
+
+ /* Linger a bit, prior to the next control message. */
+ if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
+ msleep(200);
usb_lock_device(dev);
snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, tbuf, actlen);
if (!i && actlen) {
if (copy_to_user(ctrl->data, tbuf, actlen)) {
ret = -EFAULT;
- goto recv_fault;
+ goto done;
}
}
} else {
@@ -1231,6 +1235,10 @@ static int do_proc_control(struct usb_dev_state *ps,
usb_unlock_device(dev);
i = usbfs_start_wait_urb(urb, tmo, &actlen);
+
+ /* Linger a bit, prior to the next control message. */
+ if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
+ msleep(200);
usb_lock_device(dev);
snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, NULL, 0);
}
@@ -1242,10 +1250,6 @@ static int do_proc_control(struct usb_dev_state *ps,
}
ret = (i < 0 ? i : actlen);
- recv_fault:
- /* Linger a bit, prior to the next control message. */
- if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG)
- msleep(200);
done:
kfree(dr);
usb_free_urb(urb);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index d3c14b5ed4a1..97b44a68668a 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -404,6 +404,9 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+ /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/
+ { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },
+
/* Realtek hub in Dell WD19 (Type-C) */
{ USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM },
@@ -507,6 +510,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* DJI CineSSD */
{ USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
+ /* VCOM device */
+ { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS },
+
/* INTEL VALUE SSD */
{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 1170b800acdc..d28cd1a6709b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -274,7 +274,8 @@ int dwc3_core_soft_reset(struct dwc3 *dwc)
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
reg |= DWC3_DCTL_CSFTRST;
- dwc3_writel(dwc->regs, DWC3_DCTL, reg);
+ reg &= ~DWC3_DCTL_RUN_STOP;
+ dwc3_gadget_dctl_write_safe(dwc, reg);
/*
* For DWC_usb31 controller 1.90a and later, the DCTL.CSFRST bit
@@ -1377,10 +1378,10 @@ static void dwc3_get_properties(struct dwc3 *dwc)
u8 lpm_nyet_threshold;
u8 tx_de_emphasis;
u8 hird_threshold;
- u8 rx_thr_num_pkt_prd;
- u8 rx_max_burst_prd;
- u8 tx_thr_num_pkt_prd;
- u8 tx_max_burst_prd;
+ u8 rx_thr_num_pkt_prd = 0;
+ u8 rx_max_burst_prd = 0;
+ u8 tx_thr_num_pkt_prd = 0;
+ u8 tx_max_burst_prd = 0;
u8 tx_fifo_resize_max_num;
const char *usb_psy_name;
int ret;
@@ -1690,21 +1691,44 @@ static int dwc3_probe(struct platform_device *pdev)
/*
* Clocks are optional, but new DT platforms should support all
* clocks as required by the DT-binding.
+ * Some devices have different clock names in legacy device trees,
+ * check for them to retain backwards compatibility.
*/
dwc->bus_clk = devm_clk_get_optional(dev, "bus_early");
if (IS_ERR(dwc->bus_clk))
return dev_err_probe(dev, PTR_ERR(dwc->bus_clk),
"could not get bus clock\n");
+ if (dwc->bus_clk == NULL) {
+ dwc->bus_clk = devm_clk_get_optional(dev, "bus_clk");
+ if (IS_ERR(dwc->bus_clk))
+ return dev_err_probe(dev, PTR_ERR(dwc->bus_clk),
+ "could not get bus clock\n");
+ }
+
dwc->ref_clk = devm_clk_get_optional(dev, "ref");
if (IS_ERR(dwc->ref_clk))
return dev_err_probe(dev, PTR_ERR(dwc->ref_clk),
"could not get ref clock\n");
+ if (dwc->ref_clk == NULL) {
+ dwc->ref_clk = devm_clk_get_optional(dev, "ref_clk");
+ if (IS_ERR(dwc->ref_clk))
+ return dev_err_probe(dev, PTR_ERR(dwc->ref_clk),
+ "could not get ref clock\n");
+ }
+
dwc->susp_clk = devm_clk_get_optional(dev, "suspend");
if (IS_ERR(dwc->susp_clk))
return dev_err_probe(dev, PTR_ERR(dwc->susp_clk),
"could not get suspend clock\n");
+
+ if (dwc->susp_clk == NULL) {
+ dwc->susp_clk = devm_clk_get_optional(dev, "suspend_clk");
+ if (IS_ERR(dwc->susp_clk))
+ return dev_err_probe(dev, PTR_ERR(dwc->susp_clk),
+ "could not get suspend clock\n");
+ }
}
ret = reset_control_deassert(dwc->reset);
diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c
index b60b5f7b6dff..8cad9e7d3368 100644
--- a/drivers/usb/dwc3/drd.c
+++ b/drivers/usb/dwc3/drd.c
@@ -584,16 +584,15 @@ int dwc3_drd_init(struct dwc3 *dwc)
{
int ret, irq;
+ if (ROLE_SWITCH &&
+ device_property_read_bool(dwc->dev, "usb-role-switch"))
+ return dwc3_setup_role_switch(dwc);
+
dwc->edev = dwc3_get_extcon(dwc);
if (IS_ERR(dwc->edev))
return PTR_ERR(dwc->edev);
- if (ROLE_SWITCH &&
- device_property_read_bool(dwc->dev, "usb-role-switch")) {
- ret = dwc3_setup_role_switch(dwc);
- if (ret < 0)
- return ret;
- } else if (dwc->edev) {
+ if (dwc->edev) {
dwc->edev_nb.notifier_call = dwc3_drd_notifier;
ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST,
&dwc->edev_nb);
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 33f657d83246..2e19e0e4ea53 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -45,6 +45,8 @@
#define PCI_DEVICE_ID_INTEL_ADLM 0x54ee
#define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
+#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
+#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
@@ -456,6 +458,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
(kernel_ulong_t) &dwc3_pci_intel_swnode, },
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index ab725d2262d6..0b9c2493844a 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3274,6 +3274,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
const struct dwc3_event_depevt *event,
struct dwc3_request *req, int status)
{
+ int request_status;
int ret;
if (req->request.num_mapped_sgs)
@@ -3294,7 +3295,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
req->needs_extra_trb = false;
}
- dwc3_gadget_giveback(dep, req, status);
+ /*
+ * The event status only reflects the status of the TRB with IOC set.
+ * For the requests that don't set interrupt on completion, the driver
+ * needs to check and return the status of the completed TRBs associated
+ * with the request. Use the status of the last TRB of the request.
+ */
+ if (req->request.no_interrupt) {
+ struct dwc3_trb *trb;
+
+ trb = dwc3_ep_prev_trb(dep, dep->trb_dequeue);
+ switch (DWC3_TRB_SIZE_TRBSTS(trb->size)) {
+ case DWC3_TRBSTS_MISSED_ISOC:
+ /* Isoc endpoint only */
+ request_status = -EXDEV;
+ break;
+ case DWC3_TRB_STS_XFER_IN_PROG:
+ /* Applicable when End Transfer with ForceRM=0 */
+ case DWC3_TRBSTS_SETUP_PENDING:
+ /* Control endpoint only */
+ case DWC3_TRBSTS_OK:
+ default:
+ request_status = 0;
+ break;
+ }
+ } else {
+ request_status = status;
+ }
+
+ dwc3_gadget_giveback(dep, req, request_status);
out:
return ret;
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 1fb837d9271e..84b73cb03f87 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1438,6 +1438,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget)
usb_ep_autoconfig_reset(cdev->gadget);
spin_lock_irqsave(&gi->spinlock, flags);
cdev->gadget = NULL;
+ cdev->deactivations = 0;
+ gadget->deactivated = false;
set_gadget_data(gadget, NULL);
spin_unlock_irqrestore(&gi->spinlock, flags);
}
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 71bb5e477dba..d37965867b23 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -890,13 +890,37 @@ static void uvc_function_unbind(struct usb_configuration *c,
{
struct usb_composite_dev *cdev = c->cdev;
struct uvc_device *uvc = to_uvc(f);
+ long wait_ret = 1;
uvcg_info(f, "%s()\n", __func__);
+ /* If we know we're connected via v4l2, then there should be a cleanup
+ * of the device from userspace either via UVC_EVENT_DISCONNECT or
+ * though the video device removal uevent. Allow some time for the
+ * application to close out before things get deleted.
+ */
+ if (uvc->func_connected) {
+ uvcg_dbg(f, "waiting for clean disconnect\n");
+ wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
+ uvc->func_connected == false, msecs_to_jiffies(500));
+ uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret);
+ }
+
device_remove_file(&uvc->vdev.dev, &dev_attr_function_name);
video_unregister_device(&uvc->vdev);
v4l2_device_unregister(&uvc->v4l2_dev);
+ if (uvc->func_connected) {
+ /* Wait for the release to occur to ensure there are no longer any
+ * pending operations that may cause panics when resources are cleaned
+ * up.
+ */
+ uvcg_warn(f, "%s no clean disconnect, wait for release\n", __func__);
+ wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue,
+ uvc->func_connected == false, msecs_to_jiffies(1000));
+ uvcg_dbg(f, "done waiting for release with ret: %ld\n", wait_ret);
+ }
+
usb_ep_free_request(cdev->gadget->ep0, uvc->control_req);
kfree(uvc->control_buf);
@@ -915,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi)
mutex_init(&uvc->video.mutex);
uvc->state = UVC_STATE_DISCONNECTED;
+ init_waitqueue_head(&uvc->func_connected_queue);
opts = fi_to_f_uvc_opts(fi);
mutex_lock(&opts->lock);
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index c3607a32b986..886103a1fe9b 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/usb/composite.h>
#include <linux/videodev2.h>
+#include <linux/wait.h>
#include <media/v4l2-device.h>
#include <media/v4l2-dev.h>
@@ -129,6 +130,7 @@ struct uvc_device {
struct usb_function func;
struct uvc_video video;
bool func_connected;
+ wait_queue_head_t func_connected_queue;
/* Descriptors */
struct {
diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
index d852ac9e47e7..2cda982f3765 100644
--- a/drivers/usb/gadget/function/uvc_queue.c
+++ b/drivers/usb/gadget/function/uvc_queue.c
@@ -264,6 +264,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect)
buf->state = UVC_BUF_STATE_ERROR;
vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR);
}
+ queue->buf_used = 0;
+
/* This must be protected by the irqlock spinlock to avoid race
* conditions between uvc_queue_buffer and the disconnection event that
* could result in an interruptible wait in uvc_dequeue_buffer. Do not
diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
index a2c78690c5c2..fd8f73bb726d 100644
--- a/drivers/usb/gadget/function/uvc_v4l2.c
+++ b/drivers/usb/gadget/function/uvc_v4l2.c
@@ -253,10 +253,11 @@ uvc_v4l2_subscribe_event(struct v4l2_fh *fh,
static void uvc_v4l2_disable(struct uvc_device *uvc)
{
- uvc->func_connected = false;
uvc_function_disconnect(uvc);
uvcg_video_enable(&uvc->video, 0);
uvcg_free_buffers(&uvc->video.queue);
+ uvc->func_connected = false;
+ wake_up_interruptible(&uvc->func_connected_queue);
}
static int
diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c
index 8d40a1f2ec57..e9440f7bf019 100644
--- a/drivers/usb/gadget/legacy/raw_gadget.c
+++ b/drivers/usb/gadget/legacy/raw_gadget.c
@@ -145,6 +145,7 @@ enum dev_state {
STATE_DEV_INVALID = 0,
STATE_DEV_OPENED,
STATE_DEV_INITIALIZED,
+ STATE_DEV_REGISTERING,
STATE_DEV_RUNNING,
STATE_DEV_CLOSED,
STATE_DEV_FAILED
@@ -508,6 +509,7 @@ static int raw_ioctl_run(struct raw_dev *dev, unsigned long value)
ret = -EINVAL;
goto out_unlock;
}
+ dev->state = STATE_DEV_REGISTERING;
spin_unlock_irqrestore(&dev->lock, flags);
ret = usb_gadget_probe_driver(&dev->driver);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 3d82e0b853be..684164fa9716 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1103,6 +1103,26 @@ static void ehci_remove_device(struct usb_hcd *hcd, struct usb_device *udev)
#ifdef CONFIG_PM
+/* Clear wakeup signal locked in zhaoxin platform when device plug in. */
+static void ehci_zx_wakeup_clear(struct ehci_hcd *ehci)
+{
+ u32 __iomem *reg = &ehci->regs->port_status[4];
+ u32 t1 = ehci_readl(ehci, reg);
+
+ t1 &= (u32)~0xf0000;
+ t1 |= PORT_TEST_FORCE;
+ ehci_writel(ehci, t1, reg);
+ t1 = ehci_readl(ehci, reg);
+ msleep(1);
+ t1 &= (u32)~0xf0000;
+ ehci_writel(ehci, t1, reg);
+ ehci_readl(ehci, reg);
+ msleep(1);
+ t1 = ehci_readl(ehci, reg);
+ ehci_writel(ehci, t1 | PORT_CSC, reg);
+ ehci_readl(ehci, reg);
+}
+
/* suspend/resume, section 4.3 */
/* These routines handle the generic parts of controller suspend/resume */
@@ -1154,6 +1174,9 @@ int ehci_resume(struct usb_hcd *hcd, bool force_reset)
if (ehci->shutdown)
return 0; /* Controller is dead */
+ if (ehci->zx_wakeup_clear_needed)
+ ehci_zx_wakeup_clear(ehci);
+
/*
* If CF is still set and reset isn't forced
* then we maintained suspend power.
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index 638f03b89739..9937c5a7efc2 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -231,6 +231,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
ehci->is_aspeed = 1;
}
break;
+ case PCI_VENDOR_ID_ZHAOXIN:
+ if (pdev->device == 0x3104 && (pdev->revision & 0xf0) == 0x90)
+ ehci->zx_wakeup_clear_needed = 1;
+ break;
}
/* optional debug port, normally in the first BAR */
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index fdd073cc053b..ad3f13a3eaf1 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -220,6 +220,7 @@ struct ehci_hcd { /* one per controller */
unsigned imx28_write_fix:1; /* For Freescale i.MX28 */
unsigned spurious_oc:1;
unsigned is_aspeed:1;
+ unsigned zx_wakeup_clear_needed:1;
/* required for usb32 quirk */
#define OHCI_CTRL_HCFS (3 << 6)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 1e7dc130c39a..f65f1ba2b592 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1434,7 +1434,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
}
spin_unlock_irqrestore(&xhci->lock, flags);
if (!wait_for_completion_timeout(&bus_state->u3exit_done[wIndex],
- msecs_to_jiffies(100)))
+ msecs_to_jiffies(500)))
xhci_dbg(xhci, "missing U0 port change event for port %d-%d\n",
hcd->self.busnum, wIndex + 1);
spin_lock_irqsave(&xhci->lock, flags);
diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c
index f3139ce7b0a9..06a6b19acaae 100644
--- a/drivers/usb/host/xhci-mtk-sch.c
+++ b/drivers/usb/host/xhci-mtk-sch.c
@@ -19,11 +19,6 @@
#define HS_BW_BOUNDARY 6144
/* usb2 spec section11.18.1: at most 188 FS bytes per microframe */
#define FS_PAYLOAD_MAX 188
-/*
- * max number of microframes for split transfer,
- * for fs isoc in : 1 ss + 1 idle + 7 cs
- */
-#define TT_MICROFRAMES_MAX 9
#define DBG_BUF_EN 64
@@ -242,28 +237,17 @@ static void drop_tt(struct usb_device *udev)
static struct mu3h_sch_ep_info *
create_sch_ep(struct xhci_hcd_mtk *mtk, struct usb_device *udev,
- struct usb_host_endpoint *ep, struct xhci_ep_ctx *ep_ctx)
+ struct usb_host_endpoint *ep)
{
struct mu3h_sch_ep_info *sch_ep;
struct mu3h_sch_bw_info *bw_info;
struct mu3h_sch_tt *tt = NULL;
- u32 len_bw_budget_table;
bw_info = get_bw_info(mtk, udev, ep);
if (!bw_info)
return ERR_PTR(-ENODEV);
- if (is_fs_or_ls(udev->speed))
- len_bw_budget_table = TT_MICROFRAMES_MAX;
- else if ((udev->speed >= USB_SPEED_SUPER)
- && usb_endpoint_xfer_isoc(&ep->desc))
- len_bw_budget_table = get_esit(ep_ctx);
- else
- len_bw_budget_table = 1;
-
- sch_ep = kzalloc(struct_size(sch_ep, bw_budget_table,
- len_bw_budget_table),
- GFP_KERNEL);
+ sch_ep = kzalloc(sizeof(*sch_ep), GFP_KERNEL);
if (!sch_ep)
return ERR_PTR(-ENOMEM);
@@ -295,8 +279,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
u32 mult;
u32 esit_pkts;
u32 max_esit_payload;
- u32 *bwb_table = sch_ep->bw_budget_table;
- int i;
ep_type = CTX_TO_EP_TYPE(le32_to_cpu(ep_ctx->ep_info2));
maxpkt = MAX_PACKET_DECODED(le32_to_cpu(ep_ctx->ep_info2));
@@ -332,7 +314,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
*/
sch_ep->pkts = max_burst + 1;
sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
- bwb_table[0] = sch_ep->bw_cost_per_microframe;
} else if (sch_ep->speed >= USB_SPEED_SUPER) {
/* usb3_r1 spec section4.4.7 & 4.4.8 */
sch_ep->cs_count = 0;
@@ -349,7 +330,6 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
if (ep_type == INT_IN_EP || ep_type == INT_OUT_EP) {
sch_ep->pkts = esit_pkts;
sch_ep->num_budget_microframes = 1;
- bwb_table[0] = maxpkt * sch_ep->pkts;
}
if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) {
@@ -366,15 +346,8 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
DIV_ROUND_UP(esit_pkts, sch_ep->pkts);
sch_ep->repeat = !!(sch_ep->num_budget_microframes > 1);
- sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
-
- for (i = 0; i < sch_ep->num_budget_microframes - 1; i++)
- bwb_table[i] = sch_ep->bw_cost_per_microframe;
-
- /* last one <= bw_cost_per_microframe */
- bwb_table[i] = maxpkt * esit_pkts
- - i * sch_ep->bw_cost_per_microframe;
}
+ sch_ep->bw_cost_per_microframe = maxpkt * sch_ep->pkts;
} else if (is_fs_or_ls(sch_ep->speed)) {
sch_ep->pkts = 1; /* at most one packet for each microframe */
@@ -384,28 +357,7 @@ static void setup_sch_info(struct xhci_ep_ctx *ep_ctx,
*/
sch_ep->cs_count = DIV_ROUND_UP(maxpkt, FS_PAYLOAD_MAX);
sch_ep->num_budget_microframes = sch_ep->cs_count;
- sch_ep->bw_cost_per_microframe =
- (maxpkt < FS_PAYLOAD_MAX) ? maxpkt : FS_PAYLOAD_MAX;
-
- /* init budget table */
- if (ep_type == ISOC_OUT_EP) {
- for (i = 0; i < sch_ep->num_budget_microframes; i++)
- bwb_table[i] = sch_ep->bw_cost_per_microframe;
- } else if (ep_type == INT_OUT_EP) {
- /* only first one consumes bandwidth, others as zero */
- bwb_table[0] = sch_ep->bw_cost_per_microframe;
- } else { /* INT_IN_EP or ISOC_IN_EP */
- bwb_table[0] = 0; /* start split */
- bwb_table[1] = 0; /* idle */
- /*
- * due to cs_count will be updated according to cs
- * position, assign all remainder budget array
- * elements as @bw_cost_per_microframe, but only first
- * @num_budget_microframes elements will be used later
- */
- for (i = 2; i < TT_MICROFRAMES_MAX; i++)
- bwb_table[i] = sch_ep->bw_cost_per_microframe;
- }
+ sch_ep->bw_cost_per_microframe = min_t(u32, maxpkt, FS_PAYLOAD_MAX);
}
}
@@ -422,7 +374,7 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
for (j = 0; j < sch_ep->num_budget_microframes; j++) {
k = XHCI_MTK_BW_INDEX(base + j);
- bw = sch_bw->bus_bw[k] + sch_ep->bw_budget_table[j];
+ bw = sch_bw->bus_bw[k] + sch_ep->bw_cost_per_microframe;
if (bw > max_bw)
max_bw = bw;
}
@@ -433,18 +385,16 @@ static u32 get_max_bw(struct mu3h_sch_bw_info *sch_bw,
static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw,
struct mu3h_sch_ep_info *sch_ep, bool used)
{
+ int bw_updated;
u32 base;
- int i, j, k;
+ int i, j;
+
+ bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
for (i = 0; i < sch_ep->num_esit; i++) {
base = sch_ep->offset + i * sch_ep->esit;
- for (j = 0; j < sch_ep->num_budget_microframes; j++) {
- k = XHCI_MTK_BW_INDEX(base + j);
- if (used)
- sch_bw->bus_bw[k] += sch_ep->bw_budget_table[j];
- else
- sch_bw->bus_bw[k] -= sch_ep->bw_budget_table[j];
- }
+ for (j = 0; j < sch_ep->num_budget_microframes; j++)
+ sch_bw->bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
}
}
@@ -464,7 +414,7 @@ static int check_fs_bus_bw(struct mu3h_sch_ep_info *sch_ep, int offset)
*/
for (j = 0; j < sch_ep->num_budget_microframes; j++) {
k = XHCI_MTK_BW_INDEX(base + j);
- tmp = tt->fs_bus_bw[k] + sch_ep->bw_budget_table[j];
+ tmp = tt->fs_bus_bw[k] + sch_ep->bw_cost_per_microframe;
if (tmp > FS_PAYLOAD_MAX)
return -ESCH_BW_OVERFLOW;
}
@@ -538,19 +488,17 @@ static int check_sch_tt(struct mu3h_sch_ep_info *sch_ep, u32 offset)
static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
{
struct mu3h_sch_tt *tt = sch_ep->sch_tt;
+ int bw_updated;
u32 base;
- int i, j, k;
+ int i, j;
+
+ bw_updated = sch_ep->bw_cost_per_microframe * (used ? 1 : -1);
for (i = 0; i < sch_ep->num_esit; i++) {
base = sch_ep->offset + i * sch_ep->esit;
- for (j = 0; j < sch_ep->num_budget_microframes; j++) {
- k = XHCI_MTK_BW_INDEX(base + j);
- if (used)
- tt->fs_bus_bw[k] += sch_ep->bw_budget_table[j];
- else
- tt->fs_bus_bw[k] -= sch_ep->bw_budget_table[j];
- }
+ for (j = 0; j < sch_ep->num_budget_microframes; j++)
+ tt->fs_bus_bw[XHCI_MTK_BW_INDEX(base + j)] += bw_updated;
}
if (used)
@@ -710,7 +658,7 @@ static int add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev,
xhci_dbg(xhci, "%s %s\n", __func__, decode_ep(ep, udev->speed));
- sch_ep = create_sch_ep(mtk, udev, ep, ep_ctx);
+ sch_ep = create_sch_ep(mtk, udev, ep);
if (IS_ERR_OR_NULL(sch_ep))
return -ENOMEM;
diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h
index ffd4b493b4ba..1174a510dd38 100644
--- a/drivers/usb/host/xhci-mtk.h
+++ b/drivers/usb/host/xhci-mtk.h
@@ -83,7 +83,6 @@ struct mu3h_sch_bw_info {
* times; 1: distribute the (bMaxBurst+1)*(Mult+1) packets
* according to @pkts and @repeat. normal mode is used by
* default
- * @bw_budget_table: table to record bandwidth budget per microframe
*/
struct mu3h_sch_ep_info {
u32 esit;
@@ -109,7 +108,6 @@ struct mu3h_sch_ep_info {
u32 pkts;
u32 cs_count;
u32 burst_mode;
- u32 bw_budget_table[];
};
#define MU3C_U3_PORT_MAX 4
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 5c351970cdf1..d7e0e6ebf080 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -59,6 +59,7 @@
#define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13
#define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138
#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e
+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed
#define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639
#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
@@ -266,7 +267,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI ||
- pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI))
+ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI))
xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index d0b6806275e0..f9707997969d 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3141,6 +3141,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
if (event_loop++ < TRBS_PER_SEGMENT / 2)
continue;
xhci_update_erst_dequeue(xhci, event_ring_deq);
+ event_ring_deq = xhci->event_ring->dequeue;
/* ring is half-full, force isoc trbs to interrupt more often */
if (xhci->isoc_bei_interval > AVOID_BEI_INTERVAL_MIN)
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index c8af2cd2216d..996958a6565c 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1034,13 +1034,13 @@ static int tegra_xusb_unpowergate_partitions(struct tegra_xusb *tegra)
int rc;
if (tegra->use_genpd) {
- rc = pm_runtime_get_sync(tegra->genpd_dev_ss);
+ rc = pm_runtime_resume_and_get(tegra->genpd_dev_ss);
if (rc < 0) {
dev_err(dev, "failed to enable XUSB SS partition\n");
return rc;
}
- rc = pm_runtime_get_sync(tegra->genpd_dev_host);
+ rc = pm_runtime_resume_and_get(tegra->genpd_dev_host);
if (rc < 0) {
dev_err(dev, "failed to enable XUSB Host partition\n");
pm_runtime_put_sync(tegra->genpd_dev_ss);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 642610c78f58..25b87e99b4dd 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -781,6 +781,17 @@ void xhci_shutdown(struct usb_hcd *hcd)
if (xhci->quirks & XHCI_SPURIOUS_REBOOT)
usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev));
+ /* Don't poll the roothubs after shutdown. */
+ xhci_dbg(xhci, "%s: stopping usb%d port polling.\n",
+ __func__, hcd->self.busnum);
+ clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+ del_timer_sync(&hcd->rh_timer);
+
+ if (xhci->shared_hcd) {
+ clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
+ del_timer_sync(&xhci->shared_hcd->rh_timer);
+ }
+
spin_lock_irq(&xhci->lock);
xhci_halt(xhci);
/* Workaround for spurious wakeups at shutdown with HSW */
diff --git a/drivers/usb/misc/qcom_eud.c b/drivers/usb/misc/qcom_eud.c
index f929bffdc5d1..b7f13df00764 100644
--- a/drivers/usb/misc/qcom_eud.c
+++ b/drivers/usb/misc/qcom_eud.c
@@ -186,16 +186,16 @@ static int eud_probe(struct platform_device *pdev)
chip->dev = &pdev->dev;
- ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip);
- if (ret)
- return dev_err_probe(chip->dev, ret,
- "failed to add role switch release action\n");
-
chip->role_sw = usb_role_switch_get(&pdev->dev);
if (IS_ERR(chip->role_sw))
return dev_err_probe(chip->dev, PTR_ERR(chip->role_sw),
"failed to get role switch\n");
+ ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip);
+ if (ret)
+ return dev_err_probe(chip->dev, ret,
+ "failed to add role switch release action\n");
+
chip->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(chip->base))
return PTR_ERR(chip->base);
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 748139d26263..0be8efcda15d 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -71,6 +71,7 @@ static void destroy_priv(struct kref *kref)
dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n");
usb_put_dev(priv->usbdev);
+ priv->usbdev = NULL;
kfree(priv);
}
@@ -736,7 +737,6 @@ static int uss720_probe(struct usb_interface *intf,
parport_announce_port(pp);
usb_set_intfdata(intf, pp);
- usb_put_dev(usbdev);
return 0;
probe_abort:
@@ -754,7 +754,6 @@ static void uss720_disconnect(struct usb_interface *intf)
usb_set_intfdata(intf, NULL);
if (pp) {
priv = pp->private_data;
- priv->usbdev = NULL;
priv->pp = NULL;
dev_dbg(&intf->dev, "parport_remove_port\n");
parport_remove_port(pp);
diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c
index a6b04831b20b..9b8aded3d95e 100644
--- a/drivers/usb/mtu3/mtu3_dr.c
+++ b/drivers/usb/mtu3/mtu3_dr.c
@@ -21,10 +21,8 @@ static inline struct ssusb_mtk *otg_sx_to_ssusb(struct otg_switch_mtk *otg_sx)
static void toggle_opstate(struct ssusb_mtk *ssusb)
{
- if (!ssusb->otg_switch.is_u3_drd) {
- mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION);
- mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN);
- }
+ mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION);
+ mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN);
}
/* only port0 supports dual-role mode */
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 661a229c105d..34b9f8140187 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -268,6 +268,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
return -EPROBE_DEFER;
}
+ nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
+ if (PTR_ERR(nop->vbus_draw) == -ENODEV)
+ nop->vbus_draw = NULL;
+ if (IS_ERR(nop->vbus_draw))
+ return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
+ "could not get vbus regulator\n");
+
nop->dev = dev;
nop->phy.dev = nop->dev;
nop->phy.label = "nop-xceiv";
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index a27f7efcec6a..c374620a486f 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -194,6 +194,8 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
{ USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */
{ USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */
+ { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */
+ { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */
{ USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index e7755d9cfc61..152ad882657d 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -432,6 +432,8 @@ static void option_instat_callback(struct urb *urb);
#define CINTERION_PRODUCT_CLS8 0x00b0
#define CINTERION_PRODUCT_MV31_MBIM 0x00b3
#define CINTERION_PRODUCT_MV31_RMNET 0x00b7
+#define CINTERION_PRODUCT_MV32_WA 0x00f1
+#define CINTERION_PRODUCT_MV32_WB 0x00f2
/* Olivetti products */
#define OLIVETTI_VENDOR_ID 0x0b3c
@@ -1217,6 +1219,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */
.driver_info = NCTRL(2) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff), /* Telit FN980 */
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff), /* Telit FN980 (PCIe) */
+ .driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */
@@ -1233,6 +1239,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(2) | RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */
.driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */
+ .driver_info = RSVD(0) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
@@ -1969,6 +1977,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
.driver_info = RSVD(0)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
+ .driver_info = RSVD(3)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
+ .driver_info = RSVD(3)},
{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100),
.driver_info = RSVD(4) },
{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120),
@@ -2111,10 +2123,14 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3) },
{ USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */
.driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
+ { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */
{ USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */
.driver_info = RSVD(4) | RSVD(5) },
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */
.driver_info = RSVD(6) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0106, 0xff) }, /* Fibocom MA510 (ECM mode w/ diag intf.) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */
{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */
{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 88b284d61681..1d878d05a658 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) },
+ { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) },
{ USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) },
{ USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index c5406452b774..732f9b13ad5d 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -135,6 +135,7 @@
#define HP_TD620_PRODUCT_ID 0x0956
#define HP_LD960_PRODUCT_ID 0x0b39
#define HP_LD381_PRODUCT_ID 0x0f7f
+#define HP_LM930_PRODUCT_ID 0x0f9b
#define HP_LCM220_PRODUCT_ID 0x3139
#define HP_LCM960_PRODUCT_ID 0x3239
#define HP_LD220_PRODUCT_ID 0x3524
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index c18bf8164bc2..586ef5551e76 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -166,6 +166,8 @@ static const struct usb_device_id id_table[] = {
{DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */
{DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */
{DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */
+ {DEVICE_SWI(0x1199, 0xc080)}, /* Sierra Wireless EM7590 QDL */
+ {DEVICE_SWI(0x1199, 0xc081)}, /* Sierra Wireless EM7590 */
{DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index da65d14c9ed5..06aad0d727dd 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -584,9 +584,8 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
switch (command) {
case WHITEHEAT_GET_DTR_RTS:
info = usb_get_serial_port_data(port);
- memcpy(&info->mcr, command_info->result_buffer,
- sizeof(struct whiteheat_dr_info));
- break;
+ info->mcr = command_info->result_buffer[0];
+ break;
}
}
exit:
diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index 8f921213b17d..ba24847fb245 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -56,6 +56,7 @@ config TYPEC_RT1719
tristate "Richtek RT1719 Sink Only Type-C controller driver"
depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH
depends on I2C
+ depends on POWER_SUPPLY
select REGMAP_I2C
help
Say Y or M here if your system has Richtek RT1719 sink only
diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index e07d26a3cd8e..f33e08eb7670 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -877,7 +877,7 @@ static int tcpci_remove(struct i2c_client *client)
/* Disable chip interrupts before unregistering port */
err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0);
if (err < 0)
- return err;
+ dev_warn(&client->dev, "Failed to disable irqs (%pe)\n", ERR_PTR(err));
tcpci_unregister_port(chip->tcpci);
diff --git a/drivers/usb/typec/tcpm/tcpci_mt6360.c b/drivers/usb/typec/tcpm/tcpci_mt6360.c
index f1bd9e09bc87..8a952eaf9016 100644
--- a/drivers/usb/typec/tcpm/tcpci_mt6360.c
+++ b/drivers/usb/typec/tcpm/tcpci_mt6360.c
@@ -15,6 +15,9 @@
#include "tcpci.h"
+#define MT6360_REG_PHYCTRL1 0x80
+#define MT6360_REG_PHYCTRL3 0x82
+#define MT6360_REG_PHYCTRL7 0x86
#define MT6360_REG_VCONNCTRL1 0x8C
#define MT6360_REG_MODECTRL2 0x8F
#define MT6360_REG_SWRESET 0xA0
@@ -22,6 +25,8 @@
#define MT6360_REG_DRPCTRL1 0xA2
#define MT6360_REG_DRPCTRL2 0xA3
#define MT6360_REG_I2CTORST 0xBF
+#define MT6360_REG_PHYCTRL11 0xCA
+#define MT6360_REG_RXCTRL1 0xCE
#define MT6360_REG_RXCTRL2 0xCF
#define MT6360_REG_CTDCTRL2 0xEC
@@ -106,6 +111,27 @@ static int mt6360_tcpc_init(struct tcpci *tcpci, struct tcpci_data *tdata)
if (ret)
return ret;
+ /* BMC PHY */
+ ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL1, 0x3A70);
+ if (ret)
+ return ret;
+
+ ret = regmap_write(regmap, MT6360_REG_PHYCTRL3, 0x82);
+ if (ret)
+ return ret;
+
+ ret = regmap_write(regmap, MT6360_REG_PHYCTRL7, 0x36);
+ if (ret)
+ return ret;
+
+ ret = mt6360_tcpc_write16(regmap, MT6360_REG_PHYCTRL11, 0x3C60);
+ if (ret)
+ return ret;
+
+ ret = regmap_write(regmap, MT6360_REG_RXCTRL1, 0xE8);
+ if (ret)
+ return ret;
+
/* Set shipping mode off, AUTOIDLE on */
return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A);
}
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index f0c2fa19f3e0..a6045aef0d04 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -949,6 +949,8 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role)
role == TYPEC_HOST))
goto out_unlock;
+ reinit_completion(&con->complete);
+
command = UCSI_SET_UOR | UCSI_CONNECTOR_NUMBER(con->num);
command |= UCSI_SET_UOR_ROLE(role);
command |= UCSI_SET_UOR_ACCEPT_ROLE_SWAPS;
@@ -956,14 +958,18 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role)
if (ret < 0)
goto out_unlock;
+ mutex_unlock(&con->lock);
+
if (!wait_for_completion_timeout(&con->complete,
- msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
- ret = -ETIMEDOUT;
+ msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
+ return -ETIMEDOUT;
+
+ return 0;
out_unlock:
mutex_unlock(&con->lock);
- return ret < 0 ? ret : 0;
+ return ret;
}
static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
@@ -985,6 +991,8 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
if (cur_role == role)
goto out_unlock;
+ reinit_completion(&con->complete);
+
command = UCSI_SET_PDR | UCSI_CONNECTOR_NUMBER(con->num);
command |= UCSI_SET_PDR_ROLE(role);
command |= UCSI_SET_PDR_ACCEPT_ROLE_SWAPS;
@@ -992,11 +1000,13 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role)
if (ret < 0)
goto out_unlock;
+ mutex_unlock(&con->lock);
+
if (!wait_for_completion_timeout(&con->complete,
- msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) {
- ret = -ETIMEDOUT;
- goto out_unlock;
- }
+ msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS)))
+ return -ETIMEDOUT;
+
+ mutex_lock(&con->lock);
/* Something has gone wrong while swapping the role */
if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) !=
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 79001301b383..e0de44000d92 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -161,6 +161,7 @@ struct mlx5_vdpa_net {
struct mlx5_flow_handle *rx_rule_mcast;
bool setup;
u32 cur_num_vqs;
+ u32 rqt_size;
struct notifier_block nb;
struct vdpa_callback config_cb;
struct mlx5_vdpa_wq_ent cvq_ent;
@@ -204,17 +205,12 @@ static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
}
-static inline u32 mlx5_vdpa_max_qps(int max_vqs)
-{
- return max_vqs / 2;
-}
-
static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
{
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
return 2;
- return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
+ return mvdev->max_vqs;
}
static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
@@ -1236,25 +1232,13 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
static int create_rqt(struct mlx5_vdpa_net *ndev)
{
__be32 *list;
- int max_rqt;
void *rqtc;
int inlen;
void *in;
int i, j;
int err;
- int num;
-
- if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
- num = 1;
- else
- num = ndev->cur_num_vqs / 2;
- max_rqt = min_t(int, roundup_pow_of_two(num),
- 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
- if (max_rqt < 1)
- return -EOPNOTSUPP;
-
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1263,12 +1247,12 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
- MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
- for (i = 0, j = 0; i < max_rqt; i++, j += 2)
- list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id);
+ for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
+ list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
- MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
kfree(in);
if (err)
@@ -1282,19 +1266,13 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
{
__be32 *list;
- int max_rqt;
void *rqtc;
int inlen;
void *in;
int i, j;
int err;
- max_rqt = min_t(int, roundup_pow_of_two(ndev->cur_num_vqs / 2),
- 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
- if (max_rqt < 1)
- return -EOPNOTSUPP;
-
- inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1305,10 +1283,10 @@ static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
- for (i = 0, j = 0; i < max_rqt; i++, j += 2)
+ for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
- MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt);
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
kfree(in);
if (err)
@@ -1625,7 +1603,7 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
- newqps > mlx5_vdpa_max_qps(mvdev->max_vqs))
+ newqps > ndev->rqt_size)
break;
if (ndev->cur_num_vqs == 2 * newqps) {
@@ -1989,7 +1967,7 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
int err;
int i;
- for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
+ for (i = 0; i < mvdev->max_vqs; i++) {
err = setup_vq(ndev, &ndev->vqs[i]);
if (err)
goto err_vq;
@@ -2060,9 +2038,11 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
- ndev->cur_num_vqs = 2 * mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
+ ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
else
- ndev->cur_num_vqs = 2;
+ ndev->rqt_size = 1;
+
+ ndev->cur_num_vqs = 2 * ndev->rqt_size;
update_cvq_info(mvdev);
return err;
@@ -2529,7 +2509,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
struct mlx5_vdpa_virtqueue *mvq;
int i;
- for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
+ for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
mvq = &ndev->vqs[i];
memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
mvq->index = i;
@@ -2671,7 +2651,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
return -EOPNOTSUPP;
}
- max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
+ max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
+ 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
if (max_vqs < 2) {
dev_warn(mdev->device,
"%d virtqueues are supported. At least 2 are required\n",
@@ -2742,7 +2723,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
}
- config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
+ config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
mvdev->vdev.dma_dev = &mdev->pdev->dev;
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
if (err)
@@ -2769,7 +2750,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb);
mvdev->vdev.mdev = &mgtdev->mgtdev;
- err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs) + 1);
+ err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
goto err_reg;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 792ab5f23647..297b5db47454 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1450,13 +1450,9 @@ err:
return ERR_PTR(r);
}
-static struct ptr_ring *get_tap_ptr_ring(int fd)
+static struct ptr_ring *get_tap_ptr_ring(struct file *file)
{
struct ptr_ring *ring;
- struct file *file = fget(fd);
-
- if (!file)
- return NULL;
ring = tun_get_tx_ring(file);
if (!IS_ERR(ring))
goto out;
@@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd)
goto out;
ring = NULL;
out:
- fput(file);
return ring;
}
@@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
r = vhost_net_enable_vq(n, vq);
if (r)
goto err_used;
- if (index == VHOST_NET_VQ_RX)
- nvq->rx_ring = get_tap_ptr_ring(fd);
+ if (index == VHOST_NET_VQ_RX) {
+ if (sock)
+ nvq->rx_ring = get_tap_ptr_ring(sock->file);
+ else
+ nvq->rx_ring = NULL;
+ }
oldubufs = nvq->ubufs;
nvq->ubufs = ubufs;
diff --git a/drivers/video/fbdev/arkfb.c b/drivers/video/fbdev/arkfb.c
index edf169d0816e..eb3e47c58c5f 100644
--- a/drivers/video/fbdev/arkfb.c
+++ b/drivers/video/fbdev/arkfb.c
@@ -566,6 +566,9 @@ static int arkfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
{
int rv, mem, step;
+ if (!var->pixclock)
+ return -EINVAL;
+
/* Find appropriate format */
rv = svga_match_format (arkfb_formats, var, NULL);
if (rv < 0)
diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c
index 6ff16d3132e5..b26c81233b6b 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -68,7 +68,6 @@
#ifdef CONFIG_PPC_PMAC
#include <asm/machdep.h>
#include <asm/pmac_feature.h>
-#include <asm/prom.h>
#include "../macmodes.h"
#endif
diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 1aef3d6ebd88..a3e6faed7745 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -79,7 +79,6 @@
#ifdef __powerpc__
#include <asm/machdep.h>
-#include <asm/prom.h>
#include "../macmodes.h"
#endif
#ifdef __sparc__
diff --git a/drivers/video/fbdev/aty/radeon_pm.c b/drivers/video/fbdev/aty/radeon_pm.c
index b5fbd5329652..97a5972f5b1f 100644
--- a/drivers/video/fbdev/aty/radeon_pm.c
+++ b/drivers/video/fbdev/aty/radeon_pm.c
@@ -22,7 +22,6 @@
#ifdef CONFIG_PPC_PMAC
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/pmac_feature.h>
#endif
diff --git a/drivers/video/fbdev/aty/radeonfb.h b/drivers/video/fbdev/aty/radeonfb.h
index 93f403cbb415..91d81b576231 100644
--- a/drivers/video/fbdev/aty/radeonfb.h
+++ b/drivers/video/fbdev/aty/radeonfb.h
@@ -21,7 +21,7 @@
#include <asm/io.h>
-#if defined(CONFIG_PPC) || defined(CONFIG_SPARC)
+#ifdef CONFIG_SPARC
#include <asm/prom.h>
#endif
diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c
index c5d15c6db287..771ce1f76951 100644
--- a/drivers/video/fbdev/clps711x-fb.c
+++ b/drivers/video/fbdev/clps711x-fb.c
@@ -268,8 +268,7 @@ static int clps711x_fb_probe(struct platform_device *pdev)
goto out_fb_release;
}
- cfb->syscon =
- syscon_regmap_lookup_by_compatible("cirrus,ep7209-syscon1");
+ cfb->syscon = syscon_regmap_lookup_by_phandle(np, "syscon");
if (IS_ERR(cfb->syscon)) {
ret = PTR_ERR(cfb->syscon);
goto out_fb_release;
diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c
index bd59e7b11ed5..aba46118b208 100644
--- a/drivers/video/fbdev/controlfb.c
+++ b/drivers/video/fbdev/controlfb.c
@@ -47,9 +47,6 @@
#include <linux/nvram.h>
#include <linux/adb.h>
#include <linux/cuda.h>
-#ifdef CONFIG_PPC_PMAC
-#include <asm/prom.h>
-#endif
#ifdef CONFIG_BOOTX_TEXT
#include <asm/btext.h>
#endif
diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c
index 26892940c213..82e31a2d845e 100644
--- a/drivers/video/fbdev/core/fbsysfs.c
+++ b/drivers/video/fbdev/core/fbsysfs.c
@@ -80,6 +80,10 @@ void framebuffer_release(struct fb_info *info)
{
if (!info)
return;
+
+ if (WARN_ON(refcount_read(&info->count)))
+ return;
+
kfree(info->apertures);
kfree(info);
}
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index ea42ba6445b2..b3d5f884c544 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -243,6 +243,10 @@ error:
static inline void efifb_show_boot_graphics(struct fb_info *info) {}
#endif
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
static void efifb_destroy(struct fb_info *info)
{
if (efifb_pci_dev)
@@ -254,10 +258,13 @@ static void efifb_destroy(struct fb_info *info)
else
memunmap(info->screen_base);
}
+
if (request_mem_succeeded)
release_mem_region(info->apertures->ranges[0].base,
info->apertures->ranges[0].size);
fb_dealloc_cmap(&info->cmap);
+
+ framebuffer_release(info);
}
static const struct fb_ops efifb_ops = {
@@ -620,9 +627,9 @@ static int efifb_remove(struct platform_device *pdev)
{
struct fb_info *info = platform_get_drvdata(pdev);
+ /* efifb_destroy takes care of info cleanup */
unregister_framebuffer(info);
sysfs_remove_groups(&pdev->dev.kobj, efifb_groups);
- framebuffer_release(info);
return 0;
}
diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c
index 52cce0db8bd3..09dd85553d4f 100644
--- a/drivers/video/fbdev/i740fb.c
+++ b/drivers/video/fbdev/i740fb.c
@@ -657,6 +657,9 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var,
static int i740fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
{
+ if (!var->pixclock)
+ return -EINVAL;
+
switch (var->bits_per_pixel) {
case 8:
var->red.offset = var->green.offset = var->blue.offset = 0;
@@ -740,7 +743,7 @@ static int i740fb_set_par(struct fb_info *info)
if (i)
return i;
- memset(info->screen_base, 0, info->screen_size);
+ memset_io(info->screen_base, 0, info->screen_size);
vga_protect(par);
diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index 68288756ffff..a2f644c97f28 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -925,10 +925,12 @@ static int imxfb_probe(struct platform_device *pdev)
sizeof(struct imx_fb_videomode), GFP_KERNEL);
if (!fbi->mode) {
ret = -ENOMEM;
+ of_node_put(display_np);
goto failed_of_parse;
}
ret = imxfb_of_read_mode(&pdev->dev, display_np, fbi->mode);
+ of_node_put(display_np);
if (ret)
goto failed_of_parse;
}
diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c
index 25801e8e3f74..d57772f96ad2 100644
--- a/drivers/video/fbdev/kyro/fbdev.c
+++ b/drivers/video/fbdev/kyro/fbdev.c
@@ -494,6 +494,8 @@ static int kyrofb_set_par(struct fb_info *info)
info->var.hsync_len +
info->var.left_margin)) / 1000;
+ if (!lineclock)
+ return -EINVAL;
/* time for a frame in ns (precision in 32bpp) */
frameclock = lineclock * (info->var.yres +
diff --git a/drivers/video/fbdev/matrox/matroxfb_base.h b/drivers/video/fbdev/matrox/matroxfb_base.h
index 759dee996af1..958be6805f87 100644
--- a/drivers/video/fbdev/matrox/matroxfb_base.h
+++ b/drivers/video/fbdev/matrox/matroxfb_base.h
@@ -47,7 +47,6 @@
#include <asm/unaligned.h>
#if defined(CONFIG_PPC_PMAC)
-#include <asm/prom.h>
#include "../macmodes.h"
#endif
diff --git a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
index 63721337a377..a7508f5be343 100644
--- a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
+++ b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
@@ -18,6 +18,8 @@
#include <linux/interrupt.h>
#include <linux/pci.h>
#if defined(CONFIG_OF)
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <linux/of_platform.h>
#endif
#include "mb862xxfb.h"
diff --git a/drivers/video/fbdev/mmp/core.c b/drivers/video/fbdev/mmp/core.c
index 154127256a2c..03707461eced 100644
--- a/drivers/video/fbdev/mmp/core.c
+++ b/drivers/video/fbdev/mmp/core.c
@@ -127,19 +127,18 @@ EXPORT_SYMBOL_GPL(mmp_unregister_panel);
*/
struct mmp_path *mmp_get_path(const char *name)
{
- struct mmp_path *path;
- int found = 0;
+ struct mmp_path *path = NULL, *iter;
mutex_lock(&disp_lock);
- list_for_each_entry(path, &path_list, node) {
- if (!strcmp(name, path->name)) {
- found = 1;
+ list_for_each_entry(iter, &path_list, node) {
+ if (!strcmp(name, iter->name)) {
+ path = iter;
break;
}
}
mutex_unlock(&disp_lock);
- return found ? path : NULL;
+ return path;
}
EXPORT_SYMBOL_GPL(mmp_get_path);
diff --git a/drivers/video/fbdev/neofb.c b/drivers/video/fbdev/neofb.c
index 966df2a07360..28d32cbf496b 100644
--- a/drivers/video/fbdev/neofb.c
+++ b/drivers/video/fbdev/neofb.c
@@ -585,7 +585,7 @@ neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
DBG("neofb_check_var");
- if (var->pixclock && PICOS2KHZ(var->pixclock) > par->maxClock)
+ if (!var->pixclock || PICOS2KHZ(var->pixclock) > par->maxClock)
return -EINVAL;
/* Is the mode larger than the LCD panel? */
diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c
index b191bef22d98..9d9fe5c3a7a1 100644
--- a/drivers/video/fbdev/omap/hwa742.c
+++ b/drivers/video/fbdev/omap/hwa742.c
@@ -964,7 +964,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
if ((r = calc_extif_timings(ext_clk, &extif_mem_div)) < 0)
goto err3;
hwa742.extif->set_timings(&hwa742.reg_timings);
- clk_enable(hwa742.sys_ck);
+ clk_prepare_enable(hwa742.sys_ck);
calc_hwa742_clk_rates(ext_clk, &sys_clk, &pix_clk);
if ((r = calc_extif_timings(sys_clk, &extif_mem_div)) < 0)
@@ -1023,7 +1023,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode,
return 0;
err4:
- clk_disable(hwa742.sys_ck);
+ clk_disable_unprepare(hwa742.sys_ck);
err3:
hwa742.extif->cleanup();
err2:
@@ -1037,7 +1037,7 @@ static void hwa742_cleanup(void)
hwa742_set_update_mode(OMAPFB_UPDATE_DISABLED);
hwa742.extif->cleanup();
hwa742.int_ctrl->cleanup();
- clk_disable(hwa742.sys_ck);
+ clk_disable_unprepare(hwa742.sys_ck);
}
struct lcd_ctrl hwa742_ctrl = {
diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c
index 7317c9aad677..97d20dc0d1d0 100644
--- a/drivers/video/fbdev/omap/lcdc.c
+++ b/drivers/video/fbdev/omap/lcdc.c
@@ -711,7 +711,7 @@ static int omap_lcdc_init(struct omapfb_device *fbdev, int ext_mode,
dev_err(fbdev->dev, "failed to adjust LCD rate\n");
goto fail1;
}
- clk_enable(lcdc.lcd_ck);
+ clk_prepare_enable(lcdc.lcd_ck);
r = request_irq(OMAP_LCDC_IRQ, lcdc_irq_handler, 0, MODULE_NAME, fbdev);
if (r) {
@@ -746,7 +746,7 @@ fail4:
fail3:
free_irq(OMAP_LCDC_IRQ, lcdc.fbdev);
fail2:
- clk_disable(lcdc.lcd_ck);
+ clk_disable_unprepare(lcdc.lcd_ck);
fail1:
clk_put(lcdc.lcd_ck);
fail0:
@@ -760,7 +760,7 @@ static void omap_lcdc_cleanup(void)
free_fbmem();
omap_free_lcd_dma();
free_irq(OMAP_LCDC_IRQ, lcdc.fbdev);
- clk_disable(lcdc.lcd_ck);
+ clk_disable_unprepare(lcdc.lcd_ck);
clk_put(lcdc.lcd_ck);
}
diff --git a/drivers/video/fbdev/omap/sossi.c b/drivers/video/fbdev/omap/sossi.c
index 80ac67f27f0d..b9cb8b386627 100644
--- a/drivers/video/fbdev/omap/sossi.c
+++ b/drivers/video/fbdev/omap/sossi.c
@@ -598,7 +598,7 @@ static int sossi_init(struct omapfb_device *fbdev)
l &= ~CONF_SOSSI_RESET_R;
omap_writel(l, MOD_CONF_CTRL_1);
- clk_enable(sossi.fck);
+ clk_prepare_enable(sossi.fck);
l = omap_readl(ARM_IDLECT2);
l &= ~(1 << 8); /* DMACK_REQ */
omap_writel(l, ARM_IDLECT2);
@@ -649,7 +649,7 @@ static int sossi_init(struct omapfb_device *fbdev)
return 0;
err:
- clk_disable(sossi.fck);
+ clk_disable_unprepare(sossi.fck);
clk_put(sossi.fck);
return r;
}
@@ -657,6 +657,7 @@ err:
static void sossi_cleanup(void)
{
omap_lcdc_free_dma_callback();
+ clk_unprepare(sossi.fck);
clk_put(sossi.fck);
iounmap(sossi.base);
}
diff --git a/drivers/video/fbdev/platinumfb.c b/drivers/video/fbdev/platinumfb.c
index ce413a9df06e..5b9e26ea6449 100644
--- a/drivers/video/fbdev/platinumfb.c
+++ b/drivers/video/fbdev/platinumfb.c
@@ -30,9 +30,9 @@
#include <linux/fb.h>
#include <linux/init.h>
#include <linux/nvram.h>
+#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
-#include <asm/prom.h>
#include "macmodes.h"
#include "platinumfb.h"
diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c
index c68725eebee3..d3be2c64f1c0 100644
--- a/drivers/video/fbdev/pm2fb.c
+++ b/drivers/video/fbdev/pm2fb.c
@@ -1504,9 +1504,7 @@ static const struct fb_ops pm2fb_ops = {
/**
- * Device initialisation
- *
- * Initialise and allocate resource for PCI device.
+ * pm2fb_probe - Initialise and allocate resource for PCI device.
*
* @pdev: PCI device.
* @id: PCI device ID.
@@ -1711,9 +1709,7 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
/**
- * Device removal.
- *
- * Release all device resources.
+ * pm2fb_remove - Release all device resources.
*
* @pdev: PCI device to clean up.
*/
diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index f1551e00eb12..8ad91c251fe6 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -2256,10 +2256,10 @@ static int pxafb_probe(struct platform_device *dev)
goto failed;
for (i = 0; i < inf->num_modes; i++)
inf->modes[i] = pdata->modes[i];
+ } else {
+ inf = of_pxafb_of_mach_info(&dev->dev);
}
- if (!pdata)
- inf = of_pxafb_of_mach_info(&dev->dev);
if (IS_ERR_OR_NULL(inf))
goto failed;
diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c
index 5c74253e7b2c..b93c8eb02336 100644
--- a/drivers/video/fbdev/s3fb.c
+++ b/drivers/video/fbdev/s3fb.c
@@ -549,6 +549,9 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
int rv, mem, step;
u16 m, n, r;
+ if (!var->pixclock)
+ return -EINVAL;
+
/* Find appropriate format */
rv = svga_match_format (s3fb_formats, var, NULL);
diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index aa4ebe3192ec..9a4417430b4e 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c
@@ -531,9 +531,6 @@ static void sh_mobile_lcdc_display_off(struct sh_mobile_lcdc_chan *ch)
ch->tx_dev->ops->display_off(ch->tx_dev);
}
-static int sh_mobile_lcdc_check_var(struct fb_var_screeninfo *var,
- struct fb_info *info);
-
/* -----------------------------------------------------------------------------
* Format helpers
*/
diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index 94fc9c6d0411..2c198561c338 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -84,6 +84,10 @@ struct simplefb_par {
static void simplefb_clocks_destroy(struct simplefb_par *par);
static void simplefb_regulators_destroy(struct simplefb_par *par);
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
static void simplefb_destroy(struct fb_info *info)
{
struct simplefb_par *par = info->par;
@@ -94,6 +98,8 @@ static void simplefb_destroy(struct fb_info *info)
if (info->screen_base)
iounmap(info->screen_base);
+ framebuffer_release(info);
+
if (mem)
release_mem_region(mem->start, resource_size(mem));
}
@@ -545,8 +551,8 @@ static int simplefb_remove(struct platform_device *pdev)
{
struct fb_info *info = platform_get_drvdata(pdev);
+ /* simplefb_destroy takes care of info cleanup */
unregister_framebuffer(info);
- framebuffer_release(info);
return 0;
}
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c
index 742f62986b80..f28fd69d5eb7 100644
--- a/drivers/video/fbdev/sis/sis_main.c
+++ b/drivers/video/fbdev/sis/sis_main.c
@@ -4463,7 +4463,7 @@ static void sisfb_post_sis300(struct pci_dev *pdev)
SiS_SetReg(SISCR, 0x37, 0x02);
SiS_SetReg(SISPART2, 0x00, 0x1c);
v4 = 0x00; v5 = 0x00; v6 = 0x10;
- if(ivideo->SiS_Pr.UseROM) {
+ if (ivideo->SiS_Pr.UseROM && bios) {
v4 = bios[0xf5];
v5 = bios[0xf6];
v6 = bios[0xf7];
diff --git a/drivers/video/fbdev/tridentfb.c b/drivers/video/fbdev/tridentfb.c
index 4d20cb557ff0..319131bd72cf 100644
--- a/drivers/video/fbdev/tridentfb.c
+++ b/drivers/video/fbdev/tridentfb.c
@@ -996,6 +996,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var,
int ramdac = 230000; /* 230MHz for most 3D chips */
debug("enter\n");
+ if (!var->pixclock)
+ return -EINVAL;
+
/* check color depth */
if (bpp == 24)
bpp = var->bits_per_pixel = 32;
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index b6ec0b8e2b72..d280733f283b 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -1650,8 +1650,9 @@ static int dlfb_usb_probe(struct usb_interface *intf,
const struct device_attribute *attr;
struct dlfb_data *dlfb;
struct fb_info *info;
- int retval = -ENOMEM;
+ int retval;
struct usb_device *usbdev = interface_to_usbdev(intf);
+ struct usb_endpoint_descriptor *out;
/* usb initialization */
dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL);
@@ -1665,6 +1666,12 @@ static int dlfb_usb_probe(struct usb_interface *intf,
dlfb->udev = usb_get_dev(usbdev);
usb_set_intfdata(intf, dlfb);
+ retval = usb_find_common_endpoints(intf->cur_altsetting, NULL, &out, NULL, NULL);
+ if (retval) {
+ dev_err(&intf->dev, "Device should have at lease 1 bulk endpoint!\n");
+ goto error;
+ }
+
dev_dbg(&intf->dev, "console enable=%d\n", console);
dev_dbg(&intf->dev, "fb_defio enable=%d\n", fb_defio);
dev_dbg(&intf->dev, "shadow enable=%d\n", shadow);
@@ -1674,6 +1681,7 @@ static int dlfb_usb_probe(struct usb_interface *intf,
if (!dlfb_parse_vendor_descriptor(dlfb, intf)) {
dev_err(&intf->dev,
"firmware not recognized, incompatible device?\n");
+ retval = -ENODEV;
goto error;
}
@@ -1687,8 +1695,10 @@ static int dlfb_usb_probe(struct usb_interface *intf,
/* allocates framebuffer driver structure, not framebuffer memory */
info = framebuffer_alloc(0, &dlfb->udev->dev);
- if (!info)
+ if (!info) {
+ retval = -ENOMEM;
goto error;
+ }
dlfb->info = info;
info->par = dlfb;
diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c
index 8425afe37d7c..a6c9d4f26669 100644
--- a/drivers/video/fbdev/valkyriefb.c
+++ b/drivers/video/fbdev/valkyriefb.c
@@ -54,10 +54,9 @@
#include <linux/nvram.h>
#include <linux/adb.h>
#include <linux/cuda.h>
+#include <linux/of_address.h>
#ifdef CONFIG_MAC
#include <asm/macintosh.h>
-#else
-#include <asm/prom.h>
#endif
#include "macmodes.h"
diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c
index df6de5a9dd4c..e25e8de5ff67 100644
--- a/drivers/video/fbdev/vesafb.c
+++ b/drivers/video/fbdev/vesafb.c
@@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
return err;
}
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end
+ * of unregister_framebuffer() or fb_release(). Do any cleanup here.
+ */
static void vesafb_destroy(struct fb_info *info)
{
struct vesafb_par *par = info->par;
@@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info)
if (info->screen_base)
iounmap(info->screen_base);
release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+
+ framebuffer_release(info);
}
static struct fb_ops vesafb_ops = {
@@ -484,10 +490,10 @@ static int vesafb_remove(struct platform_device *pdev)
{
struct fb_info *info = platform_get_drvdata(pdev);
+ /* vesafb_destroy takes care of info cleanup */
unregister_framebuffer(info);
if (((struct vesafb_par *)(info->par))->region)
release_region(0x3c0, 32);
- framebuffer_release(info);
return 0;
}
diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c
index 7a959e5ba90b..a92a8c670cf0 100644
--- a/drivers/video/fbdev/vt8623fb.c
+++ b/drivers/video/fbdev/vt8623fb.c
@@ -321,6 +321,9 @@ static int vt8623fb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf
{
int rv, mem, step;
+ if (!var->pixclock)
+ return -EINVAL;
+
/* Find appropriate format */
rv = svga_match_format (vt8623fb_formats, var, NULL);
if (rv < 0)
diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c
index f93b6abbe258..bebd371c6b93 100644
--- a/drivers/video/of_display_timing.c
+++ b/drivers/video/of_display_timing.c
@@ -199,7 +199,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np)
struct display_timing *dt;
int r;
- dt = kzalloc(sizeof(*dt), GFP_KERNEL);
+ dt = kmalloc(sizeof(*dt), GFP_KERNEL);
if (!dt) {
pr_err("%pOF: could not allocate display_timing struct\n",
np);
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
index 121b9293c737..c877da072d4d 100644
--- a/drivers/virt/Kconfig
+++ b/drivers/virt/Kconfig
@@ -47,4 +47,7 @@ source "drivers/virt/vboxguest/Kconfig"
source "drivers/virt/nitro_enclaves/Kconfig"
source "drivers/virt/acrn/Kconfig"
+
+source "drivers/virt/coco/efi_secret/Kconfig"
+
endif
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index 108d0ffcc9aa..067b5427f40f 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -9,3 +9,4 @@ obj-y += vboxguest/
obj-$(CONFIG_NITRO_ENCLAVES) += nitro_enclaves/
obj-$(CONFIG_ACRN_HSM) += acrn/
+obj-$(CONFIG_EFI_SECRET) += coco/efi_secret/
diff --git a/drivers/virt/coco/efi_secret/Kconfig b/drivers/virt/coco/efi_secret/Kconfig
new file mode 100644
index 000000000000..4404d198f3b2
--- /dev/null
+++ b/drivers/virt/coco/efi_secret/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config EFI_SECRET
+ tristate "EFI secret area securityfs support"
+ depends on EFI && X86_64
+ select EFI_COCO_SECRET
+ select SECURITYFS
+ help
+ This is a driver for accessing the EFI secret area via securityfs.
+ The EFI secret area is a memory area designated by the firmware for
+ confidential computing secret injection (for example for AMD SEV
+ guests). The driver exposes the secrets as files in
+ <securityfs>/secrets/coco. Files can be read and deleted (deleting
+ a file wipes the secret from memory).
+
+ To compile this driver as a module, choose M here.
+ The module will be called efi_secret.
diff --git a/drivers/virt/coco/efi_secret/Makefile b/drivers/virt/coco/efi_secret/Makefile
new file mode 100644
index 000000000000..c7047ce804f7
--- /dev/null
+++ b/drivers/virt/coco/efi_secret/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_EFI_SECRET) += efi_secret.o
diff --git a/drivers/virt/coco/efi_secret/efi_secret.c b/drivers/virt/coco/efi_secret/efi_secret.c
new file mode 100644
index 000000000000..e700a5ef7043
--- /dev/null
+++ b/drivers/virt/coco/efi_secret/efi_secret.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * efi_secret module
+ *
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Dov Murik <dovmurik@linux.ibm.com>
+ */
+
+/**
+ * DOC: efi_secret: Allow reading EFI confidential computing (coco) secret area
+ * via securityfs interface.
+ *
+ * When the module is loaded (and securityfs is mounted, typically under
+ * /sys/kernel/security), a "secrets/coco" directory is created in securityfs.
+ * In it, a file is created for each secret entry. The name of each such file
+ * is the GUID of the secret entry, and its content is the secret data.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/security.h>
+#include <linux/efi.h>
+#include <linux/cacheflush.h>
+
+#define EFI_SECRET_NUM_FILES 64
+
+struct efi_secret {
+ struct dentry *secrets_dir;
+ struct dentry *fs_dir;
+ struct dentry *fs_files[EFI_SECRET_NUM_FILES];
+ void __iomem *secret_data;
+ u64 secret_data_len;
+};
+
+/*
+ * Structure of the EFI secret area
+ *
+ * Offset Length
+ * (bytes) (bytes) Usage
+ * ------- ------- -----
+ * 0 16 Secret table header GUID (must be 1e74f542-71dd-4d66-963e-ef4287ff173b)
+ * 16 4 Length of bytes of the entire secret area
+ *
+ * 20 16 First secret entry's GUID
+ * 36 4 First secret entry's length in bytes (= 16 + 4 + x)
+ * 40 x First secret entry's data
+ *
+ * 40+x 16 Second secret entry's GUID
+ * 56+x 4 Second secret entry's length in bytes (= 16 + 4 + y)
+ * 60+x y Second secret entry's data
+ *
+ * (... and so on for additional entries)
+ *
+ * The GUID of each secret entry designates the usage of the secret data.
+ */
+
+/**
+ * struct secret_header - Header of entire secret area; this should be followed
+ * by instances of struct secret_entry.
+ * @guid: Must be EFI_SECRET_TABLE_HEADER_GUID
+ * @len: Length in bytes of entire secret area, including header
+ */
+struct secret_header {
+ efi_guid_t guid;
+ u32 len;
+} __attribute((packed));
+
+/**
+ * struct secret_entry - Holds one secret entry
+ * @guid: Secret-specific GUID (or NULL_GUID if this secret entry was deleted)
+ * @len: Length of secret entry, including its guid and len fields
+ * @data: The secret data (full of zeros if this secret entry was deleted)
+ */
+struct secret_entry {
+ efi_guid_t guid;
+ u32 len;
+ u8 data[];
+} __attribute((packed));
+
+static size_t secret_entry_data_len(struct secret_entry *e)
+{
+ return e->len - sizeof(*e);
+}
+
+static struct efi_secret the_efi_secret;
+
+static inline struct efi_secret *efi_secret_get(void)
+{
+ return &the_efi_secret;
+}
+
+static int efi_secret_bin_file_show(struct seq_file *file, void *data)
+{
+ struct secret_entry *e = file->private;
+
+ if (e)
+ seq_write(file, e->data, secret_entry_data_len(e));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(efi_secret_bin_file);
+
+/*
+ * Overwrite memory content with zeroes, and ensure that dirty cache lines are
+ * actually written back to memory, to clear out the secret.
+ */
+static void wipe_memory(void *addr, size_t size)
+{
+ memzero_explicit(addr, size);
+#ifdef CONFIG_X86
+ clflush_cache_range(addr, size);
+#endif
+}
+
+static int efi_secret_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct efi_secret *s = efi_secret_get();
+ struct inode *inode = d_inode(dentry);
+ struct secret_entry *e = (struct secret_entry *)inode->i_private;
+ int i;
+
+ if (e) {
+ /* Zero out the secret data */
+ wipe_memory(e->data, secret_entry_data_len(e));
+ e->guid = NULL_GUID;
+ }
+
+ inode->i_private = NULL;
+
+ for (i = 0; i < EFI_SECRET_NUM_FILES; i++)
+ if (s->fs_files[i] == dentry)
+ s->fs_files[i] = NULL;
+
+ /*
+ * securityfs_remove tries to lock the directory's inode, but we reach
+ * the unlink callback when it's already locked
+ */
+ inode_unlock(dir);
+ securityfs_remove(dentry);
+ inode_lock(dir);
+
+ return 0;
+}
+
+static const struct inode_operations efi_secret_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .unlink = efi_secret_unlink,
+};
+
+static int efi_secret_map_area(struct platform_device *dev)
+{
+ int ret;
+ struct efi_secret *s = efi_secret_get();
+ struct linux_efi_coco_secret_area *secret_area;
+
+ if (efi.coco_secret == EFI_INVALID_TABLE_ADDR) {
+ dev_err(&dev->dev, "Secret area address is not available\n");
+ return -EINVAL;
+ }
+
+ secret_area = memremap(efi.coco_secret, sizeof(*secret_area), MEMREMAP_WB);
+ if (secret_area == NULL) {
+ dev_err(&dev->dev, "Could not map secret area EFI config entry\n");
+ return -ENOMEM;
+ }
+ if (!secret_area->base_pa || secret_area->size < sizeof(struct secret_header)) {
+ dev_err(&dev->dev,
+ "Invalid secret area memory location (base_pa=0x%llx size=0x%llx)\n",
+ secret_area->base_pa, secret_area->size);
+ ret = -EINVAL;
+ goto unmap;
+ }
+
+ s->secret_data = ioremap_encrypted(secret_area->base_pa, secret_area->size);
+ if (s->secret_data == NULL) {
+ dev_err(&dev->dev, "Could not map secret area\n");
+ ret = -ENOMEM;
+ goto unmap;
+ }
+
+ s->secret_data_len = secret_area->size;
+ ret = 0;
+
+unmap:
+ memunmap(secret_area);
+ return ret;
+}
+
+static void efi_secret_securityfs_teardown(struct platform_device *dev)
+{
+ struct efi_secret *s = efi_secret_get();
+ int i;
+
+ for (i = (EFI_SECRET_NUM_FILES - 1); i >= 0; i--) {
+ securityfs_remove(s->fs_files[i]);
+ s->fs_files[i] = NULL;
+ }
+
+ securityfs_remove(s->fs_dir);
+ s->fs_dir = NULL;
+
+ securityfs_remove(s->secrets_dir);
+ s->secrets_dir = NULL;
+
+ dev_dbg(&dev->dev, "Removed securityfs entries\n");
+}
+
+static int efi_secret_securityfs_setup(struct platform_device *dev)
+{
+ struct efi_secret *s = efi_secret_get();
+ int ret = 0, i = 0, bytes_left;
+ unsigned char *ptr;
+ struct secret_header *h;
+ struct secret_entry *e;
+ struct dentry *dent;
+ char guid_str[EFI_VARIABLE_GUID_LEN + 1];
+
+ ptr = (void __force *)s->secret_data;
+ h = (struct secret_header *)ptr;
+ if (efi_guidcmp(h->guid, EFI_SECRET_TABLE_HEADER_GUID)) {
+ /*
+ * This is not an error: it just means that EFI defines secret
+ * area but it was not populated by the Guest Owner.
+ */
+ dev_dbg(&dev->dev, "EFI secret area does not start with correct GUID\n");
+ return -ENODEV;
+ }
+ if (h->len < sizeof(*h)) {
+ dev_err(&dev->dev, "EFI secret area reported length is too small\n");
+ return -EINVAL;
+ }
+ if (h->len > s->secret_data_len) {
+ dev_err(&dev->dev, "EFI secret area reported length is too big\n");
+ return -EINVAL;
+ }
+
+ s->secrets_dir = NULL;
+ s->fs_dir = NULL;
+ memset(s->fs_files, 0, sizeof(s->fs_files));
+
+ dent = securityfs_create_dir("secrets", NULL);
+ if (IS_ERR(dent)) {
+ dev_err(&dev->dev, "Error creating secrets securityfs directory entry err=%ld\n",
+ PTR_ERR(dent));
+ return PTR_ERR(dent);
+ }
+ s->secrets_dir = dent;
+
+ dent = securityfs_create_dir("coco", s->secrets_dir);
+ if (IS_ERR(dent)) {
+ dev_err(&dev->dev, "Error creating coco securityfs directory entry err=%ld\n",
+ PTR_ERR(dent));
+ return PTR_ERR(dent);
+ }
+ d_inode(dent)->i_op = &efi_secret_dir_inode_operations;
+ s->fs_dir = dent;
+
+ bytes_left = h->len - sizeof(*h);
+ ptr += sizeof(*h);
+ while (bytes_left >= (int)sizeof(*e) && i < EFI_SECRET_NUM_FILES) {
+ e = (struct secret_entry *)ptr;
+ if (e->len < sizeof(*e) || e->len > (unsigned int)bytes_left) {
+ dev_err(&dev->dev, "EFI secret area is corrupted\n");
+ ret = -EINVAL;
+ goto err_cleanup;
+ }
+
+ /* Skip deleted entries (which will have NULL_GUID) */
+ if (efi_guidcmp(e->guid, NULL_GUID)) {
+ efi_guid_to_str(&e->guid, guid_str);
+
+ dent = securityfs_create_file(guid_str, 0440, s->fs_dir, (void *)e,
+ &efi_secret_bin_file_fops);
+ if (IS_ERR(dent)) {
+ dev_err(&dev->dev, "Error creating efi_secret securityfs entry\n");
+ ret = PTR_ERR(dent);
+ goto err_cleanup;
+ }
+
+ s->fs_files[i++] = dent;
+ }
+ ptr += e->len;
+ bytes_left -= e->len;
+ }
+
+ dev_info(&dev->dev, "Created %d entries in securityfs secrets/coco\n", i);
+ return 0;
+
+err_cleanup:
+ efi_secret_securityfs_teardown(dev);
+ return ret;
+}
+
+static void efi_secret_unmap_area(void)
+{
+ struct efi_secret *s = efi_secret_get();
+
+ if (s->secret_data) {
+ iounmap(s->secret_data);
+ s->secret_data = NULL;
+ s->secret_data_len = 0;
+ }
+}
+
+static int efi_secret_probe(struct platform_device *dev)
+{
+ int ret;
+
+ ret = efi_secret_map_area(dev);
+ if (ret)
+ return ret;
+
+ ret = efi_secret_securityfs_setup(dev);
+ if (ret)
+ goto err_unmap;
+
+ return ret;
+
+err_unmap:
+ efi_secret_unmap_area();
+ return ret;
+}
+
+static int efi_secret_remove(struct platform_device *dev)
+{
+ efi_secret_securityfs_teardown(dev);
+ efi_secret_unmap_area();
+ return 0;
+}
+
+static struct platform_driver efi_secret_driver = {
+ .probe = efi_secret_probe,
+ .remove = efi_secret_remove,
+ .driver = {
+ .name = "efi_secret",
+ },
+};
+
+module_platform_driver(efi_secret_driver);
+
+MODULE_DESCRIPTION("Confidential computing EFI secret area access");
+MODULE_AUTHOR("IBM");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:efi_secret");
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 4849f94372a4..55acb32842a3 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -178,9 +178,9 @@ static void __del_gref(struct gntalloc_gref *gref)
unsigned long addr;
if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
- uint8_t *tmp = kmap(gref->page);
+ uint8_t *tmp = kmap_local_page(gref->page);
tmp[gref->notify.pgoff] = 0;
- kunmap(gref->page);
+ kunmap_local(tmp);
}
if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(gref->notify.event);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 2fe402483ad5..30b066299d39 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -740,10 +740,22 @@ int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
{
struct inode *inode = d_inode(path->dentry);
struct afs_vnode *vnode = AFS_FS_I(inode);
- int seq = 0;
+ struct key *key;
+ int ret, seq = 0;
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
+ if (!(query_flags & AT_STATX_DONT_SYNC) &&
+ !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ ret = afs_validate(vnode, key);
+ key_put(key);
+ if (ret < 0)
+ return ret;
+ }
+
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
generic_fillattr(&init_user_ns, inode, stat);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 47e72d72f7d0..32131a5d321b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -384,6 +384,17 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
return ret;
}
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
+{
+ if (inode->flags & BTRFS_INODE_NODATACOW ||
+ inode->flags & BTRFS_INODE_NODATASUM)
+ return false;
+ return true;
+}
+
struct btrfs_dio_private {
struct inode *inode;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b7631b88426e..077c95e9baa5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1060,6 +1060,7 @@ struct btrfs_fs_info {
*/
spinlock_t relocation_bg_lock;
u64 data_reloc_bg;
+ struct mutex zoned_data_reloc_io_lock;
u64 nr_global_roots;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 71fd99b48283..f26202621989 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -734,7 +734,12 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
- /* Commit dev_replace state and reserve 1 item for it. */
+ /*
+ * Commit dev_replace state and reserve 1 item for it.
+ * This is crucial to ensure we won't miss copying extents for new block
+ * groups that are allocated after we started the device replace, and
+ * must be done after setting up the device replace state.
+ */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 126f244cdf88..84795d831282 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3157,6 +3157,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
mutex_init(&fs_info->zoned_meta_io_lock);
+ mutex_init(&fs_info->zoned_data_reloc_io_lock);
seqlock_init(&fs_info->profiles_lock);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -3657,6 +3658,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
+ /*
+ * V1 space cache has some hardcoded PAGE_SIZE usage, and is
+ * going to be deprecated.
+ *
+ * Force to use v2 cache for subpage case.
+ */
+ btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
+ btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
+ "forcing free space tree for sector size %u with page size %lu",
+ sectorsize, PAGE_SIZE);
+
btrfs_warn(fs_info,
"read-write for sector size %u with page size %lu is experimental",
sectorsize, PAGE_SIZE);
@@ -4226,6 +4238,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
*/
static void btrfs_end_empty_barrier(struct bio *bio)
{
+ bio_uninit(bio);
complete(bio->bi_private);
}
@@ -4235,7 +4248,7 @@ static void btrfs_end_empty_barrier(struct bio *bio)
*/
static void write_dev_flush(struct btrfs_device *device)
{
- struct bio *bio = device->flush_bio;
+ struct bio *bio = &device->flush_bio;
#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
/*
@@ -4248,12 +4261,12 @@ static void write_dev_flush(struct btrfs_device *device)
* of simplicity, since this is a debug tool and not meant for use in
* non-debug builds.
*/
- struct request_queue *q = bdev_get_queue(device->bdev);
- if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ if (!bdev_write_cache(device->bdev))
return;
#endif
- bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
+ bio_init(bio, device->bdev, NULL, 0,
+ REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
bio->bi_end_io = btrfs_end_empty_barrier;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
@@ -4267,7 +4280,7 @@ static void write_dev_flush(struct btrfs_device *device)
*/
static blk_status_t wait_dev_flush(struct btrfs_device *device)
{
- struct bio *bio = device->flush_bio;
+ struct bio *bio = &device->flush_bio;
if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
return BLK_STS_OK;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6aa92f84f465..6260784e74b5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1239,7 +1239,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
if (size) {
ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (!ret)
*discarded_bytes += size;
else if (ret != -EOPNOTSUPP)
@@ -1256,7 +1256,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
if (bytes_left) {
ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (!ret)
*discarded_bytes += bytes_left;
}
@@ -1291,7 +1291,7 @@ static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
&discarded);
discarded += src_disc;
- } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
+ } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
} else {
ret = 0;
@@ -5987,7 +5987,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
*trimmed = 0;
/* Discard not supported = nothing to do. */
- if (!blk_queue_discard(bdev_get_queue(device->bdev)))
+ if (!bdev_max_discard_sectors(device->bdev))
return 0;
/* Not writable = nothing to do. */
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 724e8fe06aa0..33c19f51d79b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2658,6 +2658,7 @@ int btrfs_repair_one_sector(struct inode *inode,
repair_bio = btrfs_bio_alloc(1);
repair_bbio = btrfs_bio(repair_bio);
+ repair_bbio->file_offset = start;
repair_bio->bi_opf = REQ_OP_READ;
repair_bio->bi_end_io = failed_bio->bi_end_io;
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
@@ -3333,24 +3334,37 @@ static int alloc_new_bio(struct btrfs_inode *inode,
ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
if (ret < 0)
goto error;
- if (wbc) {
- struct block_device *bdev;
- bdev = fs_info->fs_devices->latest_dev->bdev;
- bio_set_dev(bio, bdev);
- wbc_init_bio(wbc, bio);
- }
- if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- struct btrfs_device *device;
+ if (wbc) {
+ /*
+ * For Zone append we need the correct block_device that we are
+ * going to write to set in the bio to be able to respect the
+ * hardware limitation. Look it up here:
+ */
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ struct btrfs_device *dev;
+
+ dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
+ fs_info->sectorsize);
+ if (IS_ERR(dev)) {
+ ret = PTR_ERR(dev);
+ goto error;
+ }
- device = btrfs_zoned_get_device(fs_info, disk_bytenr,
- fs_info->sectorsize);
- if (IS_ERR(device)) {
- ret = PTR_ERR(device);
- goto error;
+ bio_set_dev(bio, dev->bdev);
+ } else {
+ /*
+ * Otherwise pick the last added device to support
+ * cgroup writeback. For multi-device file systems this
+ * means blk-cgroup policies have to always be set on the
+ * last added/replaced device. This is a bit odd but has
+ * been like that for a long time.
+ */
+ bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
}
-
- btrfs_bio(bio)->device = device;
+ wbc_init_bio(wbc, bio);
+ } else {
+ ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
}
return 0;
error:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5082b9c70f8c..95c499b8424e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -481,17 +481,6 @@ static noinline int add_async_extent(struct async_chunk *cow,
}
/*
- * Check if the inode has flags compatible with compression
- */
-static inline bool inode_can_compress(struct btrfs_inode *inode)
-{
- if (inode->flags & BTRFS_INODE_NODATACOW ||
- inode->flags & BTRFS_INODE_NODATASUM)
- return false;
- return true;
-}
-
-/*
* Check if the inode needs to be submitted to compression, based on mount
* options, defragmentation, properties or heuristics.
*/
@@ -500,7 +489,7 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- if (!inode_can_compress(inode)) {
+ if (!btrfs_inode_can_compress(inode)) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
btrfs_ino(inode));
@@ -2019,7 +2008,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
- } else if (!inode_can_compress(inode) ||
+ } else if (!btrfs_inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
if (zoned)
ret = run_delalloc_zoned(inode, locked_page, start, end,
@@ -7810,8 +7799,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
struct bio_vec bvec;
struct bvec_iter iter;
- const u64 orig_file_offset = dip->file_offset;
- u64 start = orig_file_offset;
u32 bio_offset = 0;
blk_status_t err = BLK_STS_OK;
@@ -7821,6 +7808,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
pgoff = bvec.bv_offset;
for (i = 0; i < nr_sectors; i++) {
+ u64 start = bbio->file_offset + bio_offset;
+
ASSERT(pgoff < PAGE_SIZE);
if (uptodate &&
(!csum || !check_data_csum(inode, bbio,
@@ -7833,17 +7822,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
} else {
int ret;
- ASSERT((start - orig_file_offset) < UINT_MAX);
- ret = btrfs_repair_one_sector(inode,
- &bbio->bio,
- start - orig_file_offset,
- bvec.bv_page, pgoff,
+ ret = btrfs_repair_one_sector(inode, &bbio->bio,
+ bio_offset, bvec.bv_page, pgoff,
start, bbio->mirror_num,
submit_dio_repair_bio);
if (ret)
err = errno_to_blk_status(ret);
}
- start += sectorsize;
ASSERT(bio_offset + sectorsize > bio_offset);
bio_offset += sectorsize;
pgoff += sectorsize;
@@ -7870,6 +7855,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
+ struct btrfs_bio *bbio = btrfs_bio(bio);
blk_status_t err = bio->bi_status;
if (err)
@@ -7880,12 +7866,12 @@ static void btrfs_end_dio_bio(struct bio *bio)
bio->bi_iter.bi_size, err);
if (bio_op(bio) == REQ_OP_READ)
- err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err);
+ err = btrfs_check_read_dio_bio(dip, bbio, !err);
if (err)
dip->dio_bio->bi_status = err;
- btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
+ btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
bio_put(bio);
btrfs_dio_private_put(dip);
@@ -8046,6 +8032,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
+ btrfs_bio(bio)->file_offset = file_offset;
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
status = extract_ordered_extent(BTRFS_I(inode), bio,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be6c24577dbe..4b28aaea2702 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -468,7 +468,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
void __user *arg)
{
struct btrfs_device *device;
- struct request_queue *q;
struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
@@ -498,14 +497,11 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
dev_list) {
- if (!device->bdev)
+ if (!device->bdev || !bdev_max_discard_sectors(device->bdev))
continue;
- q = bdev_get_queue(device->bdev);
- if (blk_queue_discard(q)) {
- num_devices++;
- minlen = min_t(u64, q->limits.discard_granularity,
- minlen);
- }
+ num_devices++;
+ minlen = min_t(u64, bdev_discard_granularity(device->bdev),
+ minlen);
}
rcu_read_unlock();
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 1a6d2d5b4b33..1b31481f9e72 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -17,9 +17,11 @@ static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
struct prop_handler {
struct hlist_node node;
const char *xattr_name;
- int (*validate)(const char *value, size_t len);
+ int (*validate)(const struct btrfs_inode *inode, const char *value,
+ size_t len);
int (*apply)(struct inode *inode, const char *value, size_t len);
const char *(*extract)(struct inode *inode);
+ bool (*ignore)(const struct btrfs_inode *inode);
int inheritable;
};
@@ -55,7 +57,8 @@ find_prop_handler(const char *name,
return NULL;
}
-int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
+int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
+ const char *value, size_t value_len)
{
const struct prop_handler *handler;
@@ -69,7 +72,29 @@ int btrfs_validate_prop(const char *name, const char *value, size_t value_len)
if (value_len == 0)
return 0;
- return handler->validate(value, value_len);
+ return handler->validate(inode, value, value_len);
+}
+
+/*
+ * Check if a property should be ignored (not set) for an inode.
+ *
+ * @inode: The target inode.
+ * @name: The property's name.
+ *
+ * The caller must be sure the given property name is valid, for example by
+ * having previously called btrfs_validate_prop().
+ *
+ * Returns: true if the property should be ignored for the given inode
+ * false if the property must not be ignored for the given inode
+ */
+bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name)
+{
+ const struct prop_handler *handler;
+
+ handler = find_prop_handler(name, NULL);
+ ASSERT(handler != NULL);
+
+ return handler->ignore(inode);
}
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
@@ -252,8 +277,12 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
return ret;
}
-static int prop_compression_validate(const char *value, size_t len)
+static int prop_compression_validate(const struct btrfs_inode *inode,
+ const char *value, size_t len)
{
+ if (!btrfs_inode_can_compress(inode))
+ return -EINVAL;
+
if (!value)
return 0;
@@ -310,6 +339,22 @@ static int prop_compression_apply(struct inode *inode, const char *value,
return 0;
}
+static bool prop_compression_ignore(const struct btrfs_inode *inode)
+{
+ /*
+ * Compression only has effect for regular files, and for directories
+ * we set it just to propagate it to new files created inside them.
+ * Everything else (symlinks, devices, sockets, fifos) is pointless as
+ * it will do nothing, so don't waste metadata space on a compression
+ * xattr for anything that is neither a file nor a directory.
+ */
+ if (!S_ISREG(inode->vfs_inode.i_mode) &&
+ !S_ISDIR(inode->vfs_inode.i_mode))
+ return true;
+
+ return false;
+}
+
static const char *prop_compression_extract(struct inode *inode)
{
switch (BTRFS_I(inode)->prop_compress) {
@@ -330,6 +375,7 @@ static struct prop_handler prop_handlers[] = {
.validate = prop_compression_validate,
.apply = prop_compression_apply,
.extract = prop_compression_extract,
+ .ignore = prop_compression_ignore,
.inheritable = 1
},
};
@@ -356,6 +402,9 @@ static int inherit_props(struct btrfs_trans_handle *trans,
if (!h->inheritable)
continue;
+ if (h->ignore(BTRFS_I(inode)))
+ continue;
+
value = h->extract(parent);
if (!value)
continue;
@@ -364,7 +413,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
* This is not strictly necessary as the property should be
* valid, but in case it isn't, don't propagate it further.
*/
- ret = h->validate(value, strlen(value));
+ ret = h->validate(BTRFS_I(inode), value, strlen(value));
if (ret)
continue;
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
index 40b2c65b518c..59bea741cfcf 100644
--- a/fs/btrfs/props.h
+++ b/fs/btrfs/props.h
@@ -13,7 +13,9 @@ void __init btrfs_props_init(void);
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
const char *name, const char *value, size_t value_len,
int flags);
-int btrfs_validate_prop(const char *name, const char *value, size_t value_len);
+int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
+ const char *value, size_t value_len);
+bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name);
int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 11089568b287..8cd713d37ad2 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3699,6 +3699,31 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache)
goto skip;
+ ASSERT(cache->start <= chunk_offset);
+ /*
+ * We are using the commit root to search for device extents, so
+ * that means we could have found a device extent item from a
+ * block group that was deleted in the current transaction. The
+ * logical start offset of the deleted block group, stored at
+ * @chunk_offset, might be part of the logical address range of
+ * a new block group (which uses different physical extents).
+ * In this case btrfs_lookup_block_group() has returned the new
+ * block group, and its start address is less than @chunk_offset.
+ *
+ * We skip such new block groups, because it's pointless to
+ * process them, as we won't find their extents because we search
+ * for them using the commit root of the extent tree. For a device
+ * replace it's also fine to skip it, we won't miss copying them
+ * to the target device because we have the write duplication
+ * setup through the regular write path (by btrfs_map_block()),
+ * and we have committed a transaction when we started the device
+ * replace, right after setting up the device replace state.
+ */
+ if (cache->start < chunk_offset) {
+ btrfs_put_block_group(cache);
+ goto skip;
+ }
+
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
spin_lock(&cache->lock);
if (!cache->to_copy) {
@@ -3822,7 +3847,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
dev_replace->item_needs_writeback = 1;
up_write(&dev_replace->rwsem);
- ASSERT(cache->start == chunk_offset);
ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset,
dev_extent_len);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 17389a42a3ab..ba78ca5aabbb 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -922,6 +922,9 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
case BTRFS_EXCLOP_BALANCE:
str = "balance\n";
break;
+ case BTRFS_EXCLOP_BALANCE_PAUSED:
+ str = "balance paused\n";
+ break;
case BTRFS_EXCLOP_DEV_ADD:
str = "device add\n";
break;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 571dae8ad65e..e65633686378 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3188,6 +3188,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
if (ret) {
mutex_unlock(&fs_info->tree_root->log_mutex);
+ blk_finish_plug(&plug);
goto out;
}
}
@@ -3720,11 +3721,29 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
key.offset = first_offset;
key.type = BTRFS_DIR_LOG_INDEX_KEY;
ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
- if (ret)
+ /*
+ * -EEXIST is fine and can happen sporadically when we are logging a
+ * directory and have concurrent insertions in the subvolume's tree for
+ * items from other inodes and that result in pushing off some dir items
+ * from one leaf to another in order to accommodate for the new items.
+ * This results in logging the same dir index range key.
+ */
+ if (ret && ret != -EEXIST)
return ret;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_dir_log_item);
+ if (ret == -EEXIST) {
+ const u64 curr_end = btrfs_dir_log_end(path->nodes[0], item);
+
+ /*
+ * btrfs_del_dir_entries_in_log() might have been called during
+ * an unlink between the initial insertion of this key and the
+ * current update, or we might be logging a single entry deletion
+ * during a rename, so set the new last_offset to the max value.
+ */
+ last_offset = max(last_offset, curr_end);
+ }
btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(path);
@@ -3848,13 +3867,6 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
ret = insert_dir_log_key(trans, log, dst_path,
ino, *last_old_dentry_offset + 1,
key.offset - 1);
- /*
- * -EEXIST should never happen because when we
- * log a directory in full mode (LOG_INODE_ALL)
- * we drop all BTRFS_DIR_LOG_INDEX_KEY keys from
- * the log tree.
- */
- ASSERT(ret != -EEXIST);
if (ret < 0)
return ret;
}
@@ -5804,6 +5816,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
/*
+ * For symlinks, we must always log their content, which is stored in an
+ * inline extent, otherwise we could end up with an empty symlink after
+ * log replay, which is invalid on linux (symlink(2) returns -ENOENT if
+ * one attempts to create an empty symlink).
+ * We don't need to worry about flushing delalloc, because when we create
+ * the inline extent when the symlink is created (we never have delalloc
+ * for symlinks).
+ */
+ if (S_ISLNK(inode->vfs_inode.i_mode))
+ inode_only = LOG_INODE_ALL;
+
+ /*
* Before logging the inode item, cache the value returned by
* inode_logged(), because after that we have the need to figure out if
* the inode was previously logged in this transaction.
@@ -6181,7 +6205,7 @@ again:
}
ctx->log_new_dentries = false;
- if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
+ if (type == BTRFS_FT_DIR)
log_mode = LOG_INODE_ALL;
ret = btrfs_log_inode(trans, BTRFS_I(di_inode),
log_mode, ctx);
@@ -7018,12 +7042,12 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
/*
* Other concurrent task might be logging the old directory,
* as it can be triggered when logging other inode that had or
- * still has a dentry in the old directory. So take the old
- * directory's log_mutex to prevent getting an -EEXIST when
- * logging a key to record the deletion, or having that other
- * task logging the old directory get an -EEXIST if it attempts
- * to log the same key after we just did it. In both cases that
- * would result in falling back to a transaction commit.
+ * still has a dentry in the old directory. We lock the old
+ * directory's log_mutex to ensure the deletion of the old
+ * name is persisted, because during directory logging we
+ * delete all BTRFS_DIR_LOG_INDEX_KEY keys and the deletion of
+ * the old name's dir index item is in the delayed items, so
+ * it could be missed by an in progress directory logging.
*/
mutex_lock(&old_dir->log_mutex);
ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a8cc736731fd..b6b00338037c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -405,7 +405,6 @@ void btrfs_free_device(struct btrfs_device *device)
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
extent_io_tree_release(&device->alloc_state);
- bio_put(device->flush_bio);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
@@ -643,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
}
- if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+ if (!bdev_nonrot(bdev))
fs_devices->rotating = true;
device->bdev = bdev;
@@ -2706,7 +2705,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
- if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+ if (!bdev_nonrot(bdev))
fs_devices->rotating = true;
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@@ -6949,16 +6948,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
if (!dev)
return ERR_PTR(-ENOMEM);
- /*
- * Preallocate a bio that's always going to be used for flushing device
- * barriers and matches the device lifespan
- */
- dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
- if (!dev->flush_bio) {
- kfree(dev);
- return ERR_PTR(-ENOMEM);
- }
-
INIT_LIST_HEAD(&dev->dev_list);
INIT_LIST_HEAD(&dev->dev_alloc_list);
INIT_LIST_HEAD(&dev->post_commit_list);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bd297f23d19e..b11c563d2025 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -121,8 +121,8 @@ struct btrfs_device {
/* bytes used on the current transaction */
u64 commit_bytes_used;
- /* for sending down flush barriers */
- struct bio *flush_bio;
+ /* Bio used for flushing device barriers */
+ struct bio flush_bio;
struct completion flush_wait;
/* per-device scrub information */
@@ -328,6 +328,9 @@ struct btrfs_fs_devices {
struct btrfs_bio {
unsigned int mirror_num;
+ /* for direct I/O */
+ u64 file_offset;
+
/* @device is for stripe IO submission. */
struct btrfs_device *device;
u8 *csum;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 99abf41b89b9..85691dc2232f 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -262,7 +262,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
out:
if (start_trans)
btrfs_end_transaction(trans);
@@ -403,10 +404,13 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
struct btrfs_root *root = BTRFS_I(inode)->root;
name = xattr_full_name(handler, name);
- ret = btrfs_validate_prop(name, value, size);
+ ret = btrfs_validate_prop(BTRFS_I(inode), name, value, size);
if (ret)
return ret;
+ if (btrfs_ignore_prop(BTRFS_I(inode), name))
+ return 0;
+
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -416,7 +420,8 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
- BUG_ON(ret);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
}
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1b1b310c3c51..29b54fd9c128 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -350,7 +350,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_zoned_device_info *zone_info = NULL;
struct block_device *bdev = device->bdev;
- struct request_queue *queue = bdev_get_queue(bdev);
unsigned int max_active_zones;
unsigned int nactive;
sector_t nr_sectors;
@@ -410,7 +409,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
- max_active_zones = queue_max_active_zones(queue);
+ max_active_zones = bdev_max_active_zones(bdev);
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
btrfs_err_in_rcu(fs_info,
"zoned: %s: max active zones %u is too small, need at least %u active zones",
@@ -1835,6 +1834,12 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
+ /* No space left */
+ if (block_group->alloc_offset == block_group->zone_capacity) {
+ ret = false;
+ goto out_unlock;
+ }
+
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
@@ -1842,35 +1847,23 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
if (device->zone_info->max_active_zones == 0)
continue;
- /* No space left */
- if (block_group->alloc_offset == block_group->zone_capacity) {
- ret = false;
- goto out_unlock;
- }
-
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
-
- /* Successfully activated all the zones */
- if (i == map->num_stripes - 1)
- block_group->zone_is_active = 1;
-
-
}
+
+ /* Successfully activated all the zones */
+ block_group->zone_is_active = 1;
spin_unlock(&block_group->lock);
- if (block_group->zone_is_active) {
- /* For the active block group list */
- btrfs_get_block_group(block_group);
+ /* For the active block group list */
+ btrfs_get_block_group(block_group);
- spin_lock(&fs_info->zone_active_bgs_lock);
- list_add_tail(&block_group->active_bg_list,
- &fs_info->zone_active_bgs);
- spin_unlock(&fs_info->zone_active_bgs_lock);
- }
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
return true;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index cbf016a7bb5d..6dee76248cb4 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -359,7 +359,7 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
struct btrfs_root *root = inode->root;
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
- btrfs_inode_lock(&inode->vfs_inode, 0);
+ mutex_lock(&root->fs_info->zoned_data_reloc_io_lock);
}
static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
@@ -367,7 +367,7 @@ static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
struct btrfs_root *root = inode->root;
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
- btrfs_inode_unlock(&inode->vfs_inode, 0);
+ mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
}
#endif
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index aa25bffd4823..b6edcf89a429 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -85,7 +85,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
if (folio_test_dirty(folio)) {
dout("%p dirty_folio %p idx %lu -- already dirty\n",
mapping->host, folio, folio->index);
- BUG_ON(!folio_get_private(folio));
+ VM_BUG_ON_FOLIO(!folio_test_private(folio), folio);
return false;
}
@@ -122,7 +122,7 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio)
* Reference snap context in folio->private. Also set
* PagePrivate so that we get invalidate_folio callback.
*/
- BUG_ON(folio_get_private(folio));
+ VM_BUG_ON_FOLIO(folio_test_private(folio), folio);
folio_attach_private(folio, snapc);
return ceph_fscache_dirty_folio(mapping, folio);
@@ -150,7 +150,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset,
}
WARN_ON(!folio_test_locked(folio));
- if (folio_get_private(folio)) {
+ if (folio_test_private(folio)) {
dout("%p invalidate_folio idx %lu full dirty page\n",
inode, folio->index);
@@ -729,8 +729,11 @@ static void writepages_finish(struct ceph_osd_request *req)
/* clean all pages */
for (i = 0; i < req->r_num_ops; i++) {
- if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
+ if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) {
+ pr_warn("%s incorrect op %d req %p index %d tid %llu\n",
+ __func__, req->r_ops[i].op, req, i, req->r_tid);
break;
+ }
osd_data = osd_req_op_extent_osd_data(req, i);
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f1ad6884d4da..5c14ef04e474 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2274,6 +2274,8 @@ retry:
list_for_each_entry(req, &ci->i_unsafe_dirops,
r_unsafe_dir_item) {
s = req->r_session;
+ if (!s)
+ continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
@@ -2294,6 +2296,8 @@ retry:
list_for_each_entry(req, &ci->i_unsafe_iops,
r_unsafe_target_item) {
s = req->r_session;
+ if (!s)
+ continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
@@ -3870,6 +3874,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
inode, ci, mds, mseq, target);
retry:
+ down_read(&mdsc->snap_rwsem);
spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ci, mds);
if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
@@ -3933,6 +3938,7 @@ retry:
}
spin_unlock(&ci->i_ceph_lock);
+ up_read(&mdsc->snap_rwsem);
mutex_unlock(&session->s_mutex);
/* open target session */
@@ -3958,6 +3964,7 @@ retry:
out_unlock:
spin_unlock(&ci->i_ceph_lock);
+ up_read(&mdsc->snap_rwsem);
mutex_unlock(&session->s_mutex);
if (tsession) {
mutex_unlock(&tsession->s_mutex);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6c9e837aa1d3..8c8226c0feac 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -629,9 +629,15 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
iinfo.change_attr = 1;
ceph_encode_timespec64(&iinfo.btime, &now);
- iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
- iinfo.xattr_data = xattr_buf;
- memset(iinfo.xattr_data, 0, iinfo.xattr_len);
+ if (req->r_pagelist) {
+ iinfo.xattr_len = req->r_pagelist->length;
+ iinfo.xattr_data = req->r_pagelist->mapped_tail;
+ } else {
+ /* fake it */
+ iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
+ iinfo.xattr_data = xattr_buf;
+ memset(iinfo.xattr_data, 0, iinfo.xattr_len);
+ }
in.ino = cpu_to_le64(vino.ino);
in.snapid = cpu_to_le64(CEPH_NOSNAP);
@@ -743,6 +749,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err = ceph_security_init_secctx(dentry, mode, &as_ctx);
if (err < 0)
goto out_ctx;
+ /* Async create can't handle more than a page of xattrs */
+ if (as_ctx.pagelist &&
+ !list_is_singular(&as_ctx.pagelist->head))
+ try_async = false;
} else if (!d_in_lookup(dentry)) {
/* If it's not being looked up, it's negative */
return -ENOENT;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fa38c013126d..00c3de177dd6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4434,8 +4434,6 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
bool check_session_state(struct ceph_mds_session *s)
{
- struct ceph_fs_client *fsc = s->s_mdsc->fsc;
-
switch (s->s_state) {
case CEPH_MDS_SESSION_OPEN:
if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
@@ -4444,10 +4442,6 @@ bool check_session_state(struct ceph_mds_session *s)
}
break;
case CEPH_MDS_SESSION_CLOSING:
- /* Should never reach this when not force unmounting */
- WARN_ON_ONCE(s->s_ttl &&
- READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
- fallthrough;
case CEPH_MDS_SESSION_NEW:
case CEPH_MDS_SESSION_RESTARTING:
case CEPH_MDS_SESSION_CLOSED:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 902e8c6c0f9c..42e14f408856 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -534,12 +534,19 @@ int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
{
/* If tcp session is not an dfs connection, then reconnect to last target server */
spin_lock(&cifs_tcp_ses_lock);
- if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
+ if (!server->is_dfs_conn) {
spin_unlock(&cifs_tcp_ses_lock);
return __cifs_reconnect(server, mark_smb_session);
}
spin_unlock(&cifs_tcp_ses_lock);
+ mutex_lock(&server->refpath_lock);
+ if (!server->origin_fullpath || !server->leaf_fullpath) {
+ mutex_unlock(&server->refpath_lock);
+ return __cifs_reconnect(server, mark_smb_session);
+ }
+ mutex_unlock(&server->refpath_lock);
+
return reconnect_dfs_server(server);
}
#else
@@ -3675,9 +3682,11 @@ static void setup_server_referral_paths(struct mount_ctx *mnt_ctx)
{
struct TCP_Server_Info *server = mnt_ctx->server;
+ mutex_lock(&server->refpath_lock);
server->origin_fullpath = mnt_ctx->origin_fullpath;
server->leaf_fullpath = mnt_ctx->leaf_fullpath;
server->current_fullpath = mnt_ctx->leaf_fullpath;
+ mutex_unlock(&server->refpath_lock);
mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL;
}
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 30e040da4f09..956f8e5cf3e7 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1422,12 +1422,14 @@ static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool
struct TCP_Server_Info *server = tcon->ses->server;
mutex_lock(&server->refpath_lock);
- if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
- __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
+ if (server->origin_fullpath) {
+ if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath,
+ server->origin_fullpath))
+ __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
+ __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
+ }
mutex_unlock(&server->refpath_lock);
- __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
-
return 0;
}
@@ -1530,11 +1532,14 @@ static void refresh_mounts(struct cifs_ses **sessions)
list_del_init(&tcon->ulist);
mutex_lock(&server->refpath_lock);
- if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
- __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
+ if (server->origin_fullpath) {
+ if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath,
+ server->origin_fullpath))
+ __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
+ __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
+ }
mutex_unlock(&server->refpath_lock);
- __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
cifs_put_tcon(tcon);
}
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a67df8eaf702..d6aaeff4a30a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1858,9 +1858,17 @@ smb2_copychunk_range(const unsigned int xid,
int chunks_copied = 0;
bool chunk_sizes_updated = false;
ssize_t bytes_written, total_bytes_written = 0;
+ struct inode *inode;
pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
+ /*
+ * We need to flush all unwritten data before we can send the
+ * copychunk ioctl to the server.
+ */
+ inode = d_inode(trgtfile->dentry);
+ filemap_write_and_wait(inode->i_mapping);
+
if (pcchunk == NULL)
return -ENOMEM;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index d9d1c353bafc..c667e6ddfe2f 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -464,13 +464,12 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
return -EIO;
}
- tr_hdr = kmalloc(sizeof(*tr_hdr), GFP_NOFS);
+ tr_hdr = kzalloc(sizeof(*tr_hdr), GFP_NOFS);
if (!tr_hdr)
return -ENOMEM;
memset(&cur_rqst[0], 0, sizeof(cur_rqst));
memset(&iov, 0, sizeof(iov));
- memset(tr_hdr, 0, sizeof(*tr_hdr));
iov.iov_base = tr_hdr;
iov.iov_len = sizeof(*tr_hdr);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index aef06e607b40..840752006f60 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1115,11 +1115,10 @@ static inline int drop_refcount(struct dio *dio)
* individual fields and will generate much worse code. This is important
* for the whole file.
*/
-static inline ssize_t
-do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
- struct block_device *bdev, struct iov_iter *iter,
- get_block_t get_block, dio_iodone_t end_io,
- dio_submit_t submit_io, int flags)
+ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, struct iov_iter *iter,
+ get_block_t get_block, dio_iodone_t end_io,
+ dio_submit_t submit_io, int flags)
{
unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
unsigned blkbits = i_blkbits;
@@ -1334,29 +1333,6 @@ fail_dio:
kmem_cache_free(dio_cache, dio);
return retval;
}
-
-ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
- struct block_device *bdev, struct iov_iter *iter,
- get_block_t get_block,
- dio_iodone_t end_io, dio_submit_t submit_io,
- int flags)
-{
- /*
- * The block device state is needed in the end to finally
- * submit everything. Since it's likely to be cache cold
- * prefetch it here as first thing to hide some of the
- * latency.
- *
- * Attempt to prefetch the pieces we likely need later.
- */
- prefetch(&bdev->bd_disk->part_tbl);
- prefetch(bdev->bd_disk->queue);
- prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES);
-
- return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block,
- end_io, submit_io, flags);
-}
-
EXPORT_SYMBOL(__blockdev_direct_IO);
static __init int dio_init(void)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 0ed880f42525..e6dea6dfca16 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1066,12 +1066,9 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
/* wake up the caller thread for sync decompression */
if (sync) {
- unsigned long flags;
-
- spin_lock_irqsave(&io->u.wait.lock, flags);
if (!atomic_add_return(bios, &io->pending_bios))
- wake_up_locked(&io->u.wait);
- spin_unlock_irqrestore(&io->u.wait.lock, flags);
+ complete(&io->u.done);
+
return;
}
@@ -1217,7 +1214,7 @@ jobqueue_init(struct super_block *sb,
} else {
fg_out:
q = fgq;
- init_waitqueue_head(&fgq->u.wait);
+ init_completion(&fgq->u.done);
atomic_set(&fgq->pending_bios, 0);
}
q->sb = sb;
@@ -1419,8 +1416,7 @@ static void z_erofs_runqueue(struct super_block *sb,
return;
/* wait until all bios are completed */
- io_wait_event(io[JQ_SUBMIT].u.wait,
- !atomic_read(&io[JQ_SUBMIT].pending_bios));
+ wait_for_completion_io(&io[JQ_SUBMIT].u.done);
/* handle synchronous decompress queue in the caller context */
z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index e043216b545f..800b11c53f57 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -97,7 +97,7 @@ struct z_erofs_decompressqueue {
z_erofs_next_pcluster_t head;
union {
- wait_queue_head_t wait;
+ struct completion done;
struct work_struct work;
} u;
};
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 2f5130059236..20d4e47f57ab 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -351,21 +351,20 @@ out:
static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg)
{
- struct request_queue *q = bdev_get_queue(inode->i_sb->s_bdev);
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(inode->i_sb->s_bdev))
return -EOPNOTSUPP;
if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
return -EFAULT;
range.minlen = max_t(unsigned int, range.minlen,
- q->limits.discard_granularity);
+ bdev_discard_granularity(inode->i_sb->s_bdev));
ret = exfat_trim_fs(inode, &range);
if (ret < 0)
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8ca21e7917d1..be0788ecaf20 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -627,13 +627,9 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
if (opts->allow_utime == (unsigned short)-1)
opts->allow_utime = ~opts->fs_dmask & 0022;
- if (opts->discard) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
-
- if (!blk_queue_discard(q)) {
- exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
- opts->discard = 0;
- }
+ if (opts->discard && !bdev_max_discard_sectors(sb->s_bdev)) {
+ exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
+ opts->discard = 0;
}
sb->s_flags |= SB_NODIRATIME;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3f87cca49f0c..a743b1e3b89e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2273,6 +2273,10 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
* Structure of a directory entry
*/
#define EXT4_NAME_LEN 255
+/*
+ * Base length of the ext4 directory entry excluding the name length
+ */
+#define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN)
struct ext4_dir_entry {
__le32 inode; /* Inode number */
@@ -3032,7 +3036,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
-extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
+extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
@@ -3064,6 +3068,7 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns,
struct dentry *dentry, struct fileattr *fa);
int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa);
extern void ext4_reset_inode_seed(struct inode *inode);
+int ext4_update_overhead(struct super_block *sb);
/* migrate.c */
extern int ext4_ext_migrate(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0d98cf402282..e473fde6b64b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4500,9 +4500,9 @@ retry:
return ret > 0 ? ret2 : ret;
}
-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
static long ext4_zero_range(struct file *file, loff_t offset,
loff_t len, int mode)
@@ -4574,6 +4574,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
+ ret = file_modified(file);
+ if (ret)
+ goto out_mutex;
+
/* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file,
@@ -4690,7 +4694,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE) {
- ret = ext4_punch_hole(inode, offset, len);
+ ret = ext4_punch_hole(file, offset, len);
goto exit;
}
@@ -4699,12 +4703,12 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto exit;
if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- ret = ext4_collapse_range(inode, offset, len);
+ ret = ext4_collapse_range(file, offset, len);
goto exit;
}
if (mode & FALLOC_FL_INSERT_RANGE) {
- ret = ext4_insert_range(inode, offset, len);
+ ret = ext4_insert_range(file, offset, len);
goto exit;
}
@@ -4740,6 +4744,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
/* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
+ ret = file_modified(file);
+ if (ret)
+ goto out;
+
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
if (ret)
goto out;
@@ -5241,8 +5249,9 @@ out:
* This implements the fallocate's collapse range functionality for ext4
* Returns: 0 and non-zero on error.
*/
-static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
{
+ struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
ext4_lblk_t punch_start, punch_stop;
@@ -5294,6 +5303,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
/* Wait for existing dio to complete */
inode_dio_wait(inode);
+ ret = file_modified(file);
+ if (ret)
+ goto out_mutex;
+
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
@@ -5387,8 +5400,9 @@ out_mutex:
* by len bytes.
* Returns 0 on success, error otherwise.
*/
-static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
{
+ struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
handle_t *handle;
@@ -5445,6 +5459,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* Wait for existing dio to complete */
inode_dio_wait(inode);
+ ret = file_modified(file);
+ if (ret)
+ goto out_mutex;
+
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 13740f2d0e61..646ece9b3455 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3953,12 +3953,14 @@ int ext4_break_layouts(struct inode *inode)
* Returns: 0 on success or negative on failure
*/
-int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
{
+ struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
ext4_lblk_t first_block, stop_block;
struct address_space *mapping = inode->i_mapping;
- loff_t first_block_offset, last_block_offset;
+ loff_t first_block_offset, last_block_offset, max_length;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
unsigned int credits;
int ret = 0, ret2 = 0;
@@ -4001,6 +4003,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
offset;
}
+ /*
+ * For punch hole the length + offset needs to be within one block
+ * before last range. Adjust the length if it goes beyond that limit.
+ */
+ max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
+ if (offset + length > max_length)
+ length = max_length - offset;
+
if (offset & (sb->s_blocksize - 1) ||
(offset + length) & (sb->s_blocksize - 1)) {
/*
@@ -4016,6 +4026,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
/* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
+ ret = file_modified(file);
+ if (ret)
+ goto out_mutex;
+
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 992229ca2d83..4d1d2326eee9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1044,7 +1044,6 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
__u32 flags = 0;
unsigned int flush_flags = 0;
struct super_block *sb = file_inode(filp)->i_sb;
- struct request_queue *q;
if (copy_from_user(&flags, (__u32 __user *)arg,
sizeof(__u32)))
@@ -1065,10 +1064,8 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID)
return -EINVAL;
- q = bdev_get_queue(EXT4_SB(sb)->s_journal->j_dev);
- if (!q)
- return -ENXIO;
- if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+ if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+ !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev))
return -EOPNOTSUPP;
if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
@@ -1393,14 +1390,13 @@ resizefs_out:
case FITRIM:
{
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(sb->s_bdev))
return -EOPNOTSUPP;
/*
@@ -1652,3 +1648,19 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
+
+static void set_overhead(struct ext4_super_block *es, const void *arg)
+{
+ es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
+}
+
+int ext4_update_overhead(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (sb_rdonly(sb) || sbi->s_overhead == 0 ||
+ sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))
+ return 0;
+
+ return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead);
+}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 252c168454c7..ea653d19f9ec 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3498,7 +3498,7 @@ int ext4_mb_init(struct super_block *sb)
spin_lock_init(&lg->lg_prealloc_lock);
}
- if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
+ if (bdev_nonrot(sb->s_bdev))
sbi->s_mb_max_linear_groups = 0;
else
sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
@@ -3629,7 +3629,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
return __blkdev_issue_discard(sb->s_bdev,
(sector_t)discard_block << (sb->s_blocksize_bits - 9),
(sector_t)count << (sb->s_blocksize_bits - 9),
- GFP_NOFS, 0, biop);
+ GFP_NOFS, biop);
} else
return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
}
@@ -6455,7 +6455,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
*/
int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ unsigned int discard_granularity = bdev_discard_granularity(sb->s_bdev);
struct ext4_group_info *grp;
ext4_group_t group, first_group, last_group;
ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
@@ -6475,9 +6475,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
range->len < sb->s_blocksize)
return -EINVAL;
/* No point to try to trim less than discard granularity */
- if (range->minlen < q->limits.discard_granularity) {
+ if (range->minlen < discard_granularity) {
minlen = EXT4_NUM_B2C(EXT4_SB(sb),
- q->limits.discard_granularity >> sb->s_blocksize_bits);
+ discard_granularity >> sb->s_blocksize_bits);
if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
goto out;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e37da8d5cd0c..767b4bfe39c3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1466,10 +1466,10 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
- while ((char *) de < dlimit) {
+ while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
- if ((char *) de + de->name_len <= dlimit &&
+ if (de->name + de->name_len <= dlimit &&
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 495ce59fb4ad..14695e2b5042 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -134,8 +134,10 @@ static void ext4_finish_bio(struct bio *bio)
continue;
}
clear_buffer_async_write(bh);
- if (bio->bi_status)
+ if (bio->bi_status) {
+ set_buffer_write_io_error(bh);
buffer_io_error(bh);
+ }
} while ((bh = bh->b_this_page) != head);
spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
if (!under_io) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 81749eaddf4c..6900da973ce2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1199,20 +1199,25 @@ static void ext4_put_super(struct super_block *sb)
int aborted = 0;
int i, err;
- ext4_unregister_li_request(sb);
- ext4_quota_off_umount(sb);
-
- flush_work(&sbi->s_error_work);
- destroy_workqueue(sbi->rsv_conversion_wq);
- ext4_release_orphan_info(sb);
-
/*
* Unregister sysfs before destroying jbd2 journal.
* Since we could still access attr_journal_task attribute via sysfs
* path which could have sbi->s_journal->j_task as NULL
+ * Unregister sysfs before flush sbi->s_error_work.
+ * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
+ * read metadata verify failed then will queue error work.
+ * flush_stashed_error_work will call start_this_handle may trigger
+ * BUG_ON.
*/
ext4_unregister_sysfs(sb);
+ ext4_unregister_li_request(sb);
+ ext4_quota_off_umount(sb);
+
+ flush_work(&sbi->s_error_work);
+ destroy_workqueue(sbi->rsv_conversion_wq);
+ ext4_release_orphan_info(sb);
+
if (sbi->s_journal) {
aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
@@ -4172,9 +4177,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_fsblk_t first_block, last_block, b;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
int s, j, count = 0;
+ int has_super = ext4_bg_has_super(sb, grp);
if (!ext4_has_feature_bigalloc(sb))
- return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+ return (has_super + ext4_bg_num_gdb(sb, grp) +
+ (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
sbi->s_itb_per_group + 2);
first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
@@ -5282,9 +5289,18 @@ no_journal:
* Get the # of file system overhead blocks from the
* superblock if present.
*/
- if (es->s_overhead_clusters)
- sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
- else {
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+ /* ignore the precalculated value if it is ridiculous */
+ if (sbi->s_overhead > ext4_blocks_count(es))
+ sbi->s_overhead = 0;
+ /*
+ * If the bigalloc feature is not enabled recalculating the
+ * overhead doesn't take long, so we might as well just redo
+ * it to make sure we are using the correct value.
+ */
+ if (!ext4_has_feature_bigalloc(sb))
+ sbi->s_overhead = 0;
+ if (sbi->s_overhead == 0) {
err = ext4_calculate_overhead(sb);
if (err)
goto failed_mount_wq;
@@ -5458,13 +5474,9 @@ no_journal:
goto failed_mount9;
}
- if (test_opt(sb, DISCARD)) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- if (!blk_queue_discard(q))
- ext4_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
- }
+ if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
+ ext4_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but the device does not support discard");
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -5602,6 +5614,8 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Quota mode: %s.", descr, ext4_quota_mode(sb));
+ /* Update the s_overhead_clusters if necessary */
+ ext4_update_overhead(sb);
return 0;
free_sbi:
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f5366feea82d..909085a78f9c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -98,9 +98,9 @@ repeat:
}
if (unlikely(!PageUptodate(page))) {
- if (page->index == sbi->metapage_eio_ofs &&
- sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) {
- set_ckpt_flags(sbi, CP_ERROR_FLAG);
+ if (page->index == sbi->metapage_eio_ofs) {
+ if (sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO)
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
} else {
sbi->metapage_eio_ofs = page->index;
sbi->metapage_eio_cnt = 0;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8e0c2e773c8d..9a1a526f2092 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -388,11 +388,23 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
return 0;
}
-static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag)
+static unsigned int f2fs_io_flags(struct f2fs_io_info *fio)
{
unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
- unsigned int fua_flag = io_flag & temp_mask;
- unsigned int meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
+ unsigned int fua_flag, meta_flag, io_flag;
+ unsigned int op_flags = 0;
+
+ if (fio->op != REQ_OP_WRITE)
+ return 0;
+ if (fio->type == DATA)
+ io_flag = fio->sbi->data_io_flag;
+ else if (fio->type == NODE)
+ io_flag = fio->sbi->node_io_flag;
+ else
+ return 0;
+
+ fua_flag = io_flag & temp_mask;
+ meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
/*
* data/node io flag bits per temp:
@@ -401,9 +413,10 @@ static void __attach_io_flag(struct f2fs_io_info *fio, unsigned int io_flag)
* Cold | Warm | Hot | Cold | Warm | Hot |
*/
if ((1 << fio->temp) & meta_flag)
- fio->op_flags |= REQ_META;
+ op_flags |= REQ_META;
if ((1 << fio->temp) & fua_flag)
- fio->op_flags |= REQ_FUA;
+ op_flags |= REQ_FUA;
+ return op_flags;
}
static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
@@ -413,14 +426,10 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
sector_t sector;
struct bio *bio;
- if (fio->type == DATA)
- __attach_io_flag(fio, sbi->data_io_flag);
- else if (fio->type == NODE)
- __attach_io_flag(fio, sbi->node_io_flag);
-
bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
- bio = bio_alloc_bioset(bdev, npages, fio->op | fio->op_flags, GFP_NOIO,
- &f2fs_bioset);
+ bio = bio_alloc_bioset(bdev, npages,
+ fio->op | fio->op_flags | f2fs_io_flags(fio),
+ GFP_NOIO, &f2fs_bioset);
bio->bi_iter.bi_sector = sector;
if (is_read_io(fio->op)) {
bio->bi_end_io = f2fs_read_end_io;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cd1e65bcf0b0..2b2b3c87e45e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -154,7 +154,6 @@ struct f2fs_mount_info {
int s_jquota_fmt; /* Format of quota to use */
#endif
/* For which write hints are passed down to block layer */
- int whint_mode;
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
int fs_mode; /* fs mode: LFS or ADAPTIVE */
@@ -1334,12 +1333,6 @@ enum {
};
enum {
- WHINT_MODE_OFF, /* not pass down write hints */
- WHINT_MODE_USER, /* try to pass down hints given by users */
- WHINT_MODE_FS, /* pass down hints with F2FS policy */
-};
-
-enum {
ALLOC_MODE_DEFAULT, /* stay default */
ALLOC_MODE_REUSE, /* reuse segments as much as possible */
};
@@ -3657,8 +3650,6 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init f2fs_create_segment_manager_caches(void);
void f2fs_destroy_segment_manager_caches(void);
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
-enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
- enum page_type type, enum temp_type temp);
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int segno);
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
@@ -4381,8 +4372,7 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
{
- return blk_queue_discard(bdev_get_queue(bdev)) ||
- bdev_is_zoned(bdev);
+ return bdev_max_discard_sectors(bdev) || bdev_is_zoned(bdev);
}
static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5b89af0f27f0..35b6c720c2bc 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2285,7 +2285,6 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct super_block *sb = inode->i_sb;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret;
@@ -2304,7 +2303,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
return ret;
range.minlen = max((unsigned int)range.minlen,
- q->limits.discard_granularity);
+ bdev_discard_granularity(sb->s_bdev));
ret = f2fs_trim_fs(F2FS_SB(sb), &range);
mnt_drop_write_file(filp);
if (ret < 0)
@@ -3686,18 +3685,18 @@ out:
static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
pgoff_t off, block_t block, block_t len, u32 flags)
{
- struct request_queue *q = bdev_get_queue(bdev);
sector_t sector = SECTOR_FROM_BLOCK(block);
sector_t nr_sects = SECTOR_FROM_BLOCK(len);
int ret = 0;
- if (!q)
- return -ENXIO;
-
- if (flags & F2FS_TRIM_FILE_DISCARD)
- ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
- blk_queue_secure_erase(q) ?
- BLKDEV_DISCARD_SECURE : 0);
+ if (flags & F2FS_TRIM_FILE_DISCARD) {
+ if (bdev_max_secure_erase_sectors(bdev))
+ ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
+ GFP_NOFS);
+ else
+ ret = blkdev_issue_discard(bdev, sector, nr_sects,
+ GFP_NOFS);
+ }
if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
if (IS_ENCRYPTED(inode))
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 71f232dcf3c2..83639238a1fe 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -550,7 +550,8 @@ make_now:
}
f2fs_set_inode_flags(inode);
- if (file_should_truncate(inode)) {
+ if (file_should_truncate(inode) &&
+ !is_sbi_flag_set(sbi, SBI_POR_DOING)) {
ret = f2fs_truncate(inode);
if (ret)
goto bad_inode;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 22dfeb991529..7225ce09f3ab 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1196,9 +1196,8 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
unsigned int *issued)
{
struct block_device *bdev = dc->bdev;
- struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
- SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+ SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
@@ -1245,7 +1244,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(start),
SECTOR_FROM_BLOCK(len),
- GFP_NOFS, 0, &bio);
+ GFP_NOFS, &bio);
submit:
if (err) {
spin_lock_irqsave(&dc->lock, flags);
@@ -1375,9 +1374,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
- struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
- SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+ SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
block_t end = lstart + len;
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
@@ -3243,101 +3241,6 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
}
}
-/* This returns write hints for each segment type. This hints will be
- * passed down to block layer. There are mapping tables which depend on
- * the mount option 'whint_mode'.
- *
- * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
- *
- * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
- *
- * User F2FS Block
- * ---- ---- -----
- * META WRITE_LIFE_NOT_SET
- * HOT_NODE "
- * WARM_NODE "
- * COLD_NODE "
- * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
- * extension list " "
- *
- * -- buffered io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE " "
- * WRITE_LIFE_MEDIUM " "
- * WRITE_LIFE_LONG " "
- *
- * -- direct io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE " WRITE_LIFE_NONE
- * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
- * WRITE_LIFE_LONG " WRITE_LIFE_LONG
- *
- * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
- *
- * User F2FS Block
- * ---- ---- -----
- * META WRITE_LIFE_MEDIUM;
- * HOT_NODE WRITE_LIFE_NOT_SET
- * WARM_NODE "
- * COLD_NODE WRITE_LIFE_NONE
- * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
- * extension list " "
- *
- * -- buffered io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
- * WRITE_LIFE_NONE " "
- * WRITE_LIFE_MEDIUM " "
- * WRITE_LIFE_LONG " "
- *
- * -- direct io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE " WRITE_LIFE_NONE
- * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
- * WRITE_LIFE_LONG " WRITE_LIFE_LONG
- */
-
-enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
- enum page_type type, enum temp_type temp)
-{
- if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
- if (type == DATA) {
- if (temp == WARM)
- return WRITE_LIFE_NOT_SET;
- else if (temp == HOT)
- return WRITE_LIFE_SHORT;
- else if (temp == COLD)
- return WRITE_LIFE_EXTREME;
- } else {
- return WRITE_LIFE_NOT_SET;
- }
- } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
- if (type == DATA) {
- if (temp == WARM)
- return WRITE_LIFE_LONG;
- else if (temp == HOT)
- return WRITE_LIFE_SHORT;
- else if (temp == COLD)
- return WRITE_LIFE_EXTREME;
- } else if (type == NODE) {
- if (temp == WARM || temp == HOT)
- return WRITE_LIFE_NOT_SET;
- else if (temp == COLD)
- return WRITE_LIFE_NONE;
- } else if (type == META) {
- return WRITE_LIFE_MEDIUM;
- }
- }
- return WRITE_LIFE_NOT_SET;
-}
-
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ea939db18f88..4368f90571bd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -138,7 +138,6 @@ enum {
Opt_jqfmt_vfsold,
Opt_jqfmt_vfsv0,
Opt_jqfmt_vfsv1,
- Opt_whint,
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
@@ -214,7 +213,6 @@ static match_table_t f2fs_tokens = {
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
- {Opt_whint, "whint_mode=%s"},
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
@@ -975,22 +973,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
f2fs_info(sbi, "quota operations not supported");
break;
#endif
- case Opt_whint:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "user-based")) {
- F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
- } else if (!strcmp(name, "off")) {
- F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
- } else if (!strcmp(name, "fs-based")) {
- F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
case Opt_alloc:
name = match_strdup(&args[0]);
if (!name)
@@ -1328,12 +1310,6 @@ default_check:
return -EINVAL;
}
- /* Not pass down write hints if the number of active logs is lesser
- * than NR_CURSEG_PERSIST_TYPE.
- */
- if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
- F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
-
if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
@@ -1978,10 +1954,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",prjquota");
#endif
f2fs_show_quota_options(seq, sbi->sb);
- if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER)
- seq_printf(seq, ",whint_mode=%s", "user-based");
- else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
- seq_printf(seq, ",whint_mode=%s", "fs-based");
fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb);
@@ -2033,7 +2005,6 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
- F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
@@ -2314,8 +2285,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_gc = true;
}
- if (*flags & SB_RDONLY ||
- F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) {
+ if (*flags & SB_RDONLY) {
sync_inodes_sb(sb);
set_sbi_flag(sbi, SBI_IS_DIRTY);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a5a309fcc7fa..bf91f977debe 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -127,13 +127,12 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
struct super_block *sb = inode->i_sb;
struct fstrim_range __user *user_range;
struct fstrim_range range;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(sb->s_bdev))
return -EOPNOTSUPP;
user_range = (struct fstrim_range __user *)arg;
@@ -141,7 +140,7 @@ static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
return -EFAULT;
range.minlen = max_t(unsigned int, range.minlen,
- q->limits.discard_granularity);
+ bdev_discard_granularity(sb->s_bdev));
err = fat_trim_fs(inode, &range);
if (err < 0)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bf6051bdf1d1..3d1afb95a925 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1872,13 +1872,9 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
goto out_fail;
}
- if (sbi->options.discard) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- if (!blk_queue_discard(q))
- fat_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
- }
+ if (sbi->options.discard && !bdev_max_discard_sectors(sb->s_bdev))
+ fat_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but the device does not support discard");
fat_set_state(sb, 1, 0);
return 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 591fe9cf1659..a1074a26e784 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1712,6 +1712,10 @@ static int writeback_single_inode(struct inode *inode,
*/
if (!(inode->i_state & I_DIRTY_ALL))
inode_cgwb_move_to_attached(inode, wb);
+ else if (!(inode->i_state & I_SYNC_QUEUED) &&
+ (inode->i_state & I_DIRTY))
+ redirty_tail_locked(inode, wb);
+
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
out:
@@ -1775,11 +1779,12 @@ static long writeback_sb_inodes(struct super_block *sb,
};
unsigned long start_time = jiffies;
long write_chunk;
- long wrote = 0; /* count both pages and inodes */
+ long total_wrote = 0; /* count both pages and inodes */
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
struct bdi_writeback *tmp_wb;
+ long wrote;
if (inode->i_sb != sb) {
if (work->sb) {
@@ -1855,7 +1860,9 @@ static long writeback_sb_inodes(struct super_block *sb,
wbc_detach_inode(&wbc);
work->nr_pages -= write_chunk - wbc.nr_to_write;
- wrote += write_chunk - wbc.nr_to_write;
+ wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;
+ wrote = wrote < 0 ? 0 : wrote;
+ total_wrote += wrote;
if (need_resched()) {
/*
@@ -1877,7 +1884,7 @@ static long writeback_sb_inodes(struct super_block *sb,
tmp_wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL))
- wrote++;
+ total_wrote++;
requeue_inode(inode, tmp_wb, &wbc);
inode_sync_complete(inode);
spin_unlock(&inode->i_lock);
@@ -1891,14 +1898,14 @@ static long writeback_sb_inodes(struct super_block *sb,
* bail out to wb_writeback() often enough to check
* background threshold and other termination conditions.
*/
- if (wrote) {
+ if (total_wrote) {
if (time_is_before_jiffies(start_time + HZ / 10UL))
break;
if (work->nr_pages <= 0)
break;
}
}
- return wrote;
+ return total_wrote;
}
static long __writeback_inodes_wb(struct bdi_writeback *wb,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 39080b2d6cf8..b6697333bb2b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1153,13 +1153,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
if (length != written && (iomap->flags & IOMAP_F_NEW)) {
/* Deallocate blocks that were just allocated. */
- loff_t blockmask = i_blocksize(inode) - 1;
- loff_t end = (pos + length) & ~blockmask;
+ loff_t hstart = round_up(pos + written, i_blocksize(inode));
+ loff_t hend = iomap->offset + iomap->length;
- pos = (pos + written + blockmask) & ~blockmask;
- if (pos < end) {
- truncate_pagecache_range(inode, pos, end - 1);
- punch_hole(ip, pos, end - pos);
+ if (hstart < hend) {
+ truncate_pagecache_range(inode, hstart, hend - 1);
+ punch_hole(ip, hstart, hend - hstart);
}
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 22b41acfbbc3..2556ae1f92ea 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -770,30 +770,27 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
return ret ? ret : ret1;
}
-static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
+static inline bool should_fault_in_pages(struct iov_iter *i,
+ struct kiocb *iocb,
size_t *prev_count,
size_t *window_size)
{
size_t count = iov_iter_count(i);
size_t size, offs;
- if (likely(!count))
- return false;
- if (ret <= 0 && ret != -EFAULT)
+ if (!count)
return false;
if (!iter_is_iovec(i))
return false;
size = PAGE_SIZE;
- offs = offset_in_page(i->iov[0].iov_base + i->iov_offset);
+ offs = offset_in_page(iocb->ki_pos);
if (*prev_count != count || !*window_size) {
size_t nr_dirtied;
- size = ALIGN(offs + count, PAGE_SIZE);
- size = min_t(size_t, size, SZ_1M);
nr_dirtied = max(current->nr_dirtied_pause -
current->nr_dirtied, 8);
- size = min(size, nr_dirtied << PAGE_SHIFT);
+ size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT);
}
*prev_count = count;
@@ -807,7 +804,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
struct file *file = iocb->ki_filp;
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
size_t prev_count = 0, window_size = 0;
- size_t written = 0;
+ size_t read = 0;
ssize_t ret;
/*
@@ -835,35 +832,31 @@ retry:
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
-retry_under_glock:
pagefault_disable();
to->nofault = true;
ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
- IOMAP_DIO_PARTIAL, written);
+ IOMAP_DIO_PARTIAL, read);
to->nofault = false;
pagefault_enable();
+ if (ret <= 0 && ret != -EFAULT)
+ goto out_unlock;
if (ret > 0)
- written = ret;
-
- if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
- size_t leftover;
+ read = ret;
- gfs2_holder_allow_demote(gh);
- leftover = fault_in_iov_iter_writeable(to, window_size);
- gfs2_holder_disallow_demote(gh);
- if (leftover != window_size) {
- if (gfs2_holder_queued(gh))
- goto retry_under_glock;
+ if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
+ gfs2_glock_dq(gh);
+ window_size -= fault_in_iov_iter_writeable(to, window_size);
+ if (window_size)
goto retry;
- }
}
+out_unlock:
if (gfs2_holder_queued(gh))
gfs2_glock_dq(gh);
out_uninit:
gfs2_holder_uninit(gh);
if (ret < 0)
return ret;
- return written;
+ return read;
}
static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@@ -873,7 +866,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
struct inode *inode = file->f_mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
size_t prev_count = 0, window_size = 0;
- size_t read = 0;
+ size_t written = 0;
ssize_t ret;
/*
@@ -899,41 +892,37 @@ retry:
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
-retry_under_glock:
/* Silently fall back to buffered I/O when writing beyond EOF */
if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
- goto out;
+ goto out_unlock;
from->nofault = true;
ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
- IOMAP_DIO_PARTIAL, read);
+ IOMAP_DIO_PARTIAL, written);
from->nofault = false;
-
- if (ret == -ENOTBLK)
- ret = 0;
+ if (ret <= 0) {
+ if (ret == -ENOTBLK)
+ ret = 0;
+ if (ret != -EFAULT)
+ goto out_unlock;
+ }
if (ret > 0)
- read = ret;
-
- if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
- size_t leftover;
+ written = ret;
- gfs2_holder_allow_demote(gh);
- leftover = fault_in_iov_iter_readable(from, window_size);
- gfs2_holder_disallow_demote(gh);
- if (leftover != window_size) {
- if (gfs2_holder_queued(gh))
- goto retry_under_glock;
+ if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+ gfs2_glock_dq(gh);
+ window_size -= fault_in_iov_iter_readable(from, window_size);
+ if (window_size)
goto retry;
- }
}
-out:
+out_unlock:
if (gfs2_holder_queued(gh))
gfs2_glock_dq(gh);
out_uninit:
gfs2_holder_uninit(gh);
if (ret < 0)
return ret;
- return read;
+ return written;
}
static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -941,7 +930,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct gfs2_inode *ip;
struct gfs2_holder gh;
size_t prev_count = 0, window_size = 0;
- size_t written = 0;
+ size_t read = 0;
ssize_t ret;
/*
@@ -962,7 +951,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (ret >= 0) {
if (!iov_iter_count(to))
return ret;
- written = ret;
+ read = ret;
} else if (ret != -EFAULT) {
if (ret != -EAGAIN)
return ret;
@@ -975,32 +964,26 @@ retry:
ret = gfs2_glock_nq(&gh);
if (ret)
goto out_uninit;
-retry_under_glock:
pagefault_disable();
ret = generic_file_read_iter(iocb, to);
pagefault_enable();
+ if (ret <= 0 && ret != -EFAULT)
+ goto out_unlock;
if (ret > 0)
- written += ret;
+ read += ret;
- if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
- size_t leftover;
-
- gfs2_holder_allow_demote(&gh);
- leftover = fault_in_iov_iter_writeable(to, window_size);
- gfs2_holder_disallow_demote(&gh);
- if (leftover != window_size) {
- if (gfs2_holder_queued(&gh))
- goto retry_under_glock;
- if (written)
- goto out_uninit;
+ if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
+ gfs2_glock_dq(&gh);
+ window_size -= fault_in_iov_iter_writeable(to, window_size);
+ if (window_size)
goto retry;
- }
}
+out_unlock:
if (gfs2_holder_queued(&gh))
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- return written ? written : ret;
+ return read ? read : ret;
}
static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
@@ -1014,7 +997,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
struct gfs2_holder *statfs_gh = NULL;
size_t prev_count = 0, window_size = 0;
size_t orig_count = iov_iter_count(from);
- size_t read = 0;
+ size_t written = 0;
ssize_t ret;
/*
@@ -1032,10 +1015,18 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
retry:
+ if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+ window_size -= fault_in_iov_iter_readable(from, window_size);
+ if (!window_size) {
+ ret = -EFAULT;
+ goto out_uninit;
+ }
+ from->count = min(from->count, window_size);
+ }
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
-retry_under_glock:
+
if (inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -1052,27 +1043,19 @@ retry_under_glock:
current->backing_dev_info = NULL;
if (ret > 0) {
iocb->ki_pos += ret;
- read += ret;
+ written += ret;
}
if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
- from->count = orig_count - read;
- if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
- size_t leftover;
-
- gfs2_holder_allow_demote(gh);
- leftover = fault_in_iov_iter_readable(from, window_size);
- gfs2_holder_disallow_demote(gh);
- if (leftover != window_size) {
- from->count = min(from->count, window_size - leftover);
- if (gfs2_holder_queued(gh))
- goto retry_under_glock;
- if (read && !(iocb->ki_flags & IOCB_DIRECT))
- goto out_uninit;
- goto retry;
- }
+ if (ret <= 0 && ret != -EFAULT)
+ goto out_unlock;
+
+ from->count = orig_count - written;
+ if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+ gfs2_glock_dq(gh);
+ goto retry;
}
out_unlock:
if (gfs2_holder_queued(gh))
@@ -1081,8 +1064,8 @@ out_uninit:
gfs2_holder_uninit(gh);
if (statfs_gh)
kfree(statfs_gh);
- from->count = orig_count - read;
- return read ? read : ret;
+ from->count = orig_count - written;
+ return written ? written : ret;
}
/**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 801ad9f4f2be..6d26bb525484 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1386,7 +1386,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
{
struct inode *inode = file_inode(filp);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
+ struct block_device *bdev = sdp->sd_vfs->s_bdev;
struct buffer_head *bh;
struct gfs2_rgrpd *rgd;
struct gfs2_rgrpd *rgd_end;
@@ -1405,7 +1405,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
return -EROFS;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(bdev))
return -EOPNOTSUPP;
if (copy_from_user(&r, argp, sizeof(r)))
@@ -1418,8 +1418,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
start = r.start >> bs_shift;
end = start + (r.len >> bs_shift);
minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
- minlen = max_t(u64, minlen,
- q->limits.discard_granularity) >> bs_shift;
+ minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift;
if (end <= start || minlen > sdp->sd_max_rg_data)
return -EINVAL;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 99c7477cee5c..dd3a088db11d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -206,7 +206,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
- info.high_limit = TASK_SIZE;
+ info.high_limit = arch_get_mmap_end(addr);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
@@ -222,7 +222,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
@@ -237,7 +237,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = current->mm->mmap_base;
- info.high_limit = TASK_SIZE;
+ info.high_limit = arch_get_mmap_end(addr);
addr = vm_unmapped_area(&info);
}
@@ -251,6 +251,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
+ const unsigned long mmap_end = arch_get_mmap_end(addr);
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -266,7 +267,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
+ if (mmap_end - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/fs/internal.h b/fs/internal.h
index 08503dc68d2b..9a6c233ee7f1 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags);
+
+/*
+ * fs/xattr.c:
+ */
+struct xattr_name {
+ char name[XATTR_NAME_MAX + 1];
+};
+
+struct xattr_ctx {
+ /* Value of attribute */
+ union {
+ const void __user *cvalue;
+ void __user *value;
+ };
+ void *kvalue;
+ size_t size;
+ /* Attribute name */
+ struct xattr_name *kname;
+ unsigned int flags;
+};
+
+
+ssize_t do_getxattr(struct user_namespace *mnt_userns,
+ struct dentry *d,
+ struct xattr_ctx *ctx);
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct xattr_ctx *ctx);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 32aeb2c581c5..824623bcf1a5 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -871,7 +871,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
{
- set_notify_signal(worker->task);
+ __set_notify_signal(worker->task);
wake_up_process(worker->task);
return false;
}
@@ -991,7 +991,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
{
if (work && match->fn(work, match->data)) {
work->flags |= IO_WQ_WORK_CANCEL;
- set_notify_signal(worker->task);
+ __set_notify_signal(worker->task);
return true;
}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index dbecd27656c7..ba6eee76d028 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
struct io_wq_work {
struct io_wq_work_node list;
unsigned flags;
+ int cancel_seq;
};
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4479013854d2..9f1c682d7caf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -80,6 +80,7 @@
#include <linux/io_uring.h>
#include <linux/audit.h>
#include <linux/security.h>
+#include <linux/xattr.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -94,7 +95,7 @@
#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
/* only define max */
-#define IORING_MAX_FIXED_FILES (1U << 15)
+#define IORING_MAX_FIXED_FILES (1U << 20)
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
@@ -113,6 +114,11 @@
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
+#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
+ IO_REQ_CLEAN_FLAGS)
+
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
#define IO_TCTX_REFS_CACHE_NR (1U << 10)
struct io_uring {
@@ -166,7 +172,7 @@ struct io_rings {
* The application needs a full memory barrier before checking
* for IORING_SQ_NEED_WAKEUP after updating the sq tail.
*/
- u32 sq_flags;
+ atomic_t sq_flags;
/*
* Runtime CQ flags
*
@@ -198,13 +204,6 @@ struct io_rings {
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
};
-enum io_uring_cmd_flags {
- IO_URING_F_COMPLETE_DEFER = 1,
- IO_URING_F_UNLOCKED = 2,
- /* int's last bit, sign checks are usually faster than a bit test */
- IO_URING_F_NONBLOCK = INT_MIN,
-};
-
struct io_mapped_ubuf {
u64 ubuf;
u64 ubuf_end;
@@ -216,10 +215,27 @@ struct io_mapped_ubuf {
struct io_ring_ctx;
struct io_overflow_cqe {
- struct io_uring_cqe cqe;
struct list_head list;
+ struct io_uring_cqe cqe;
};
+/*
+ * FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0
+ * and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we
+ * can't safely always dereference the file when the task has exited and ring
+ * cleanup is done. If a file is tracked and part of SCM, then unix gc on
+ * process exit may reap it before __io_sqe_files_unregister() is run.
+ */
+#define FFS_NOWAIT 0x1UL
+#define FFS_ISREG 0x2UL
+#if defined(CONFIG_64BIT)
+#define FFS_SCM 0x4UL
+#else
+#define IO_URING_SCM_ALL
+#define FFS_SCM 0x0UL
+#endif
+#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG|FFS_SCM)
+
struct io_fixed_file {
/* file * with additional FFS_* flags */
unsigned long file_ptr;
@@ -237,6 +253,8 @@ struct io_rsrc_put {
struct io_file_table {
struct io_fixed_file *files;
+ unsigned long *bitmap;
+ unsigned int alloc_hint;
};
struct io_rsrc_node {
@@ -261,10 +279,26 @@ struct io_rsrc_data {
bool quiesce;
};
+#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
struct io_buffer_list {
- struct list_head list;
- struct list_head buf_list;
+ /*
+ * If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not,
+ * then these are classic provided buffers and ->buf_list is used.
+ */
+ union {
+ struct list_head buf_list;
+ struct {
+ struct page **buf_pages;
+ struct io_uring_buf_ring *buf_ring;
+ };
+ };
__u16 bgid;
+
+ /* below is for ring provided buffers */
+ __u16 buf_nr_pages;
+ __u16 nr_entries;
+ __u32 head;
+ __u32 mask;
};
struct io_buffer {
@@ -337,7 +371,7 @@ struct io_ev_fd {
struct rcu_head rcu;
};
-#define IO_BUFFERS_HASH_BITS 5
+#define BGID_ARRAY 64
struct io_ring_ctx {
/* const or read-mostly hot data */
@@ -346,6 +380,7 @@ struct io_ring_ctx {
struct io_rings *rings;
unsigned int flags;
+ enum task_work_notify_mode notify_method;
unsigned int compat: 1;
unsigned int drain_next: 1;
unsigned int restricted: 1;
@@ -353,6 +388,7 @@ struct io_ring_ctx {
unsigned int drain_active: 1;
unsigned int drain_disabled: 1;
unsigned int has_evfd: 1;
+ unsigned int syscall_iopoll: 1;
} ____cacheline_aligned_in_smp;
/* submission data */
@@ -382,17 +418,21 @@ struct io_ring_ctx {
*/
struct io_rsrc_node *rsrc_node;
int rsrc_cached_refs;
+ atomic_t cancel_seq;
struct io_file_table file_table;
unsigned nr_user_files;
unsigned nr_user_bufs;
struct io_mapped_ubuf **user_bufs;
struct io_submit_state submit_state;
+
+ struct io_buffer_list *io_bl;
+ struct xarray io_bl_xa;
+ struct list_head io_buffers_cache;
+
struct list_head timeout_list;
struct list_head ltimeout_list;
struct list_head cq_overflow_list;
- struct list_head *io_buffers;
- struct list_head io_buffers_cache;
struct list_head apoll_cache;
struct xarray personalities;
u32 pers_next;
@@ -409,9 +449,16 @@ struct io_ring_ctx {
struct wait_queue_head sqo_sq_wait;
struct list_head sqd_list;
- unsigned long check_cq_overflow;
+ unsigned long check_cq;
struct {
+ /*
+ * We cache a range of free CQEs we can use, once exhausted it
+ * should go through a slower range setup, see __io_get_cqe()
+ */
+ struct io_uring_cqe *cqe_cached;
+ struct io_uring_cqe *cqe_sentinel;
+
unsigned cached_cq_tail;
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
@@ -497,7 +544,7 @@ struct io_uring_task {
spinlock_t task_lock;
struct io_wq_work_list task_list;
- struct io_wq_work_list prior_task_list;
+ struct io_wq_work_list prio_task_list;
struct callback_head task_work;
struct file **registered_rings;
bool task_running;
@@ -546,6 +593,16 @@ struct io_accept {
unsigned long nofile;
};
+struct io_socket {
+ struct file *file;
+ int domain;
+ int type;
+ int protocol;
+ int flags;
+ u32 file_slot;
+ unsigned long nofile;
+};
+
struct io_sync {
struct file *file;
loff_t len;
@@ -557,6 +614,8 @@ struct io_sync {
struct io_cancel {
struct file *file;
u64 addr;
+ u32 flags;
+ s32 fd;
};
struct io_timeout {
@@ -585,7 +644,7 @@ struct io_rw {
struct kiocb kiocb;
u64 addr;
u32 len;
- u32 flags;
+ rwf_t flags;
};
struct io_connect {
@@ -602,9 +661,9 @@ struct io_sr_msg {
void __user *buf;
};
int msg_flags;
- int bgid;
size_t len;
size_t done_io;
+ unsigned int flags;
};
struct io_open {
@@ -722,6 +781,12 @@ struct io_msg {
u32 len;
};
+struct io_nop {
+ struct file *file;
+ u64 extra1;
+ u64 extra2;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -748,6 +813,12 @@ struct io_async_rw {
struct wait_page_queue wpq;
};
+struct io_xattr {
+ struct file *file;
+ struct xattr_ctx ctx;
+ struct filename *filename;
+};
+
enum {
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
@@ -766,6 +837,7 @@ enum {
REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
+ REQ_F_BUFFER_RING_BIT,
REQ_F_COMPLETE_INLINE_BIT,
REQ_F_REISSUE_BIT,
REQ_F_CREDS_BIT,
@@ -776,6 +848,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_APOLL_MULTISHOT_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -816,6 +889,8 @@ enum {
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
/* buffer already selected */
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
+ /* buffer selected from ring, needs commit */
+ REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT),
/* completion is deferred through io_comp_state */
REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
/* caller should reissue async */
@@ -840,6 +915,8 @@ enum {
REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT),
/* request has already done partial IO */
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
+ /* fast poll multishot mode */
+ REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
};
struct async_poll {
@@ -862,6 +939,21 @@ enum {
IORING_RSRC_BUFFER = 1,
};
+struct io_cqe {
+ __u64 user_data;
+ __s32 res;
+ /* fd initially, then cflags for completion */
+ union {
+ __u32 flags;
+ int fd;
+ };
+};
+
+enum {
+ IO_CHECK_CQ_OVERFLOW_BIT,
+ IO_CHECK_CQ_DROPPED_BIT,
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -897,46 +989,65 @@ struct io_kiocb {
struct io_symlink symlink;
struct io_hardlink hardlink;
struct io_msg msg;
+ struct io_xattr xattr;
+ struct io_socket sock;
+ struct io_nop nop;
+ struct io_uring_cmd uring_cmd;
};
u8 opcode;
/* polled IO has completed */
u8 iopoll_completed;
+ /*
+ * Can be either a fixed buffer index, or used with provided buffers.
+ * For the latter, before issue it points to the buffer group ID,
+ * and after selection it points to the buffer ID itself.
+ */
u16 buf_index;
unsigned int flags;
- u64 user_data;
- u32 result;
- /* fd initially, then cflags for completion */
- union {
- u32 cflags;
- int fd;
- };
+ struct io_cqe cqe;
struct io_ring_ctx *ctx;
struct task_struct *task;
- struct percpu_ref *fixed_rsrc_refs;
- /* store used ubuf, so we can prevent reloading */
- struct io_mapped_ubuf *imu;
+ struct io_rsrc_node *rsrc_node;
+
+ union {
+ /* store used ubuf, so we can prevent reloading */
+ struct io_mapped_ubuf *imu;
+
+ /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
+ struct io_buffer *kbuf;
+
+ /*
+ * stores buffer ID for ring provided buffers, valid IFF
+ * REQ_F_BUFFER_RING is set.
+ */
+ struct io_buffer_list *buf_list;
+ };
union {
/* used by request caches, completion batching and iopoll */
struct io_wq_work_node comp_list;
/* cache ->apoll->events */
- int apoll_events;
+ __poll_t apoll_events;
};
atomic_t refs;
atomic_t poll_refs;
struct io_task_work io_task_work;
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
- struct hlist_node hash_node;
+ union {
+ struct hlist_node hash_node;
+ struct {
+ u64 extra1;
+ u64 extra2;
+ };
+ };
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
/* opcode allocated if it needs to store data for async defer */
void *async_data;
- /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
- struct io_buffer *kbuf;
/* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */
struct io_kiocb *link;
/* custom credentials, valid IFF REQ_F_CREDS is set */
@@ -956,6 +1067,24 @@ struct io_defer_entry {
u32 seq;
};
+struct io_cancel_data {
+ struct io_ring_ctx *ctx;
+ union {
+ u64 data;
+ struct file *file;
+ };
+ u32 flags;
+ int seq;
+};
+
+/*
+ * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
+ * the following sqe if SQE128 is used.
+ */
+#define uring_cmd_pdu_size(is_sqe128) \
+ ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \
+ offsetof(struct io_uring_sqe, cmd))
+
struct io_op_def {
/* needs req->file assigned */
unsigned needs_file : 1;
@@ -977,12 +1106,20 @@ struct io_op_def {
unsigned not_supported : 1;
/* skip auditing */
unsigned audit_skip : 1;
+ /* supports ioprio */
+ unsigned ioprio : 1;
+ /* supports iopoll */
+ unsigned iopoll : 1;
/* size of async data needed, if any */
unsigned short async_size;
};
static const struct io_op_def io_op_defs[] = {
- [IORING_OP_NOP] = {},
+ [IORING_OP_NOP] = {
+ .audit_skip = 1,
+ .iopoll = 1,
+ .buffer_select = 1,
+ },
[IORING_OP_READV] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
@@ -991,6 +1128,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_async_setup = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITEV] = {
@@ -1001,6 +1140,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_async_setup = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_FSYNC] = {
@@ -1013,6 +1154,8 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITE_FIXED] = {
@@ -1022,6 +1165,8 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_POLL_ADD] = {
@@ -1064,6 +1209,7 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.pollin = 1,
.poll_exclusive = 1,
+ .ioprio = 1, /* used for flags */
},
[IORING_OP_ASYNC_CANCEL] = {
.audit_skip = 1,
@@ -1086,6 +1232,7 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_CLOSE] = {},
[IORING_OP_FILES_UPDATE] = {
.audit_skip = 1,
+ .iopoll = 1,
},
[IORING_OP_STATX] = {
.audit_skip = 1,
@@ -1097,6 +1244,8 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_WRITE] = {
@@ -1106,6 +1255,8 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.plug = 1,
.audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
.async_size = sizeof(struct io_async_rw),
},
[IORING_OP_FADVISE] = {
@@ -1140,9 +1291,11 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_PROVIDE_BUFFERS] = {
.audit_skip = 1,
+ .iopoll = 1,
},
[IORING_OP_REMOVE_BUFFERS] = {
.audit_skip = 1,
+ .iopoll = 1,
},
[IORING_OP_TEE] = {
.needs_file = 1,
@@ -1160,11 +1313,30 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_LINKAT] = {},
[IORING_OP_MSG_RING] = {
.needs_file = 1,
+ .iopoll = 1,
+ },
+ [IORING_OP_FSETXATTR] = {
+ .needs_file = 1
+ },
+ [IORING_OP_SETXATTR] = {},
+ [IORING_OP_FGETXATTR] = {
+ .needs_file = 1
+ },
+ [IORING_OP_GETXATTR] = {},
+ [IORING_OP_SOCKET] = {
+ .audit_skip = 1,
+ },
+ [IORING_OP_URING_CMD] = {
+ .needs_file = 1,
+ .plug = 1,
+ .needs_async_setup = 1,
+ .async_size = uring_cmd_pdu_size(1),
},
};
/* requests with any of those set should undergo io_disarm_next() */
#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
+#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
static bool io_disarm_next(struct io_kiocb *req);
static void io_uring_del_tctx_node(unsigned long index);
@@ -1173,10 +1345,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
bool cancel_all);
static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
-static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags);
-
-static void io_put_req(struct io_kiocb *req);
-static void io_put_req_deferred(struct io_kiocb *req);
+static void __io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags);
static void io_dismantle_req(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
@@ -1185,10 +1354,10 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
static void io_clean_op(struct io_kiocb *req);
static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
-static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd);
+static struct file *io_file_get_normal(struct io_kiocb *req, int fd);
static void io_drop_inflight_file(struct io_kiocb *req);
static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
-static void __io_queue_sqe(struct io_kiocb *req);
+static void io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
static void io_req_task_queue(struct io_kiocb *req);
@@ -1201,11 +1370,115 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx);
+static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops;
+const char *io_uring_get_opcode(u8 opcode)
+{
+ switch ((enum io_uring_op)opcode) {
+ case IORING_OP_NOP:
+ return "NOP";
+ case IORING_OP_READV:
+ return "READV";
+ case IORING_OP_WRITEV:
+ return "WRITEV";
+ case IORING_OP_FSYNC:
+ return "FSYNC";
+ case IORING_OP_READ_FIXED:
+ return "READ_FIXED";
+ case IORING_OP_WRITE_FIXED:
+ return "WRITE_FIXED";
+ case IORING_OP_POLL_ADD:
+ return "POLL_ADD";
+ case IORING_OP_POLL_REMOVE:
+ return "POLL_REMOVE";
+ case IORING_OP_SYNC_FILE_RANGE:
+ return "SYNC_FILE_RANGE";
+ case IORING_OP_SENDMSG:
+ return "SENDMSG";
+ case IORING_OP_RECVMSG:
+ return "RECVMSG";
+ case IORING_OP_TIMEOUT:
+ return "TIMEOUT";
+ case IORING_OP_TIMEOUT_REMOVE:
+ return "TIMEOUT_REMOVE";
+ case IORING_OP_ACCEPT:
+ return "ACCEPT";
+ case IORING_OP_ASYNC_CANCEL:
+ return "ASYNC_CANCEL";
+ case IORING_OP_LINK_TIMEOUT:
+ return "LINK_TIMEOUT";
+ case IORING_OP_CONNECT:
+ return "CONNECT";
+ case IORING_OP_FALLOCATE:
+ return "FALLOCATE";
+ case IORING_OP_OPENAT:
+ return "OPENAT";
+ case IORING_OP_CLOSE:
+ return "CLOSE";
+ case IORING_OP_FILES_UPDATE:
+ return "FILES_UPDATE";
+ case IORING_OP_STATX:
+ return "STATX";
+ case IORING_OP_READ:
+ return "READ";
+ case IORING_OP_WRITE:
+ return "WRITE";
+ case IORING_OP_FADVISE:
+ return "FADVISE";
+ case IORING_OP_MADVISE:
+ return "MADVISE";
+ case IORING_OP_SEND:
+ return "SEND";
+ case IORING_OP_RECV:
+ return "RECV";
+ case IORING_OP_OPENAT2:
+ return "OPENAT2";
+ case IORING_OP_EPOLL_CTL:
+ return "EPOLL_CTL";
+ case IORING_OP_SPLICE:
+ return "SPLICE";
+ case IORING_OP_PROVIDE_BUFFERS:
+ return "PROVIDE_BUFFERS";
+ case IORING_OP_REMOVE_BUFFERS:
+ return "REMOVE_BUFFERS";
+ case IORING_OP_TEE:
+ return "TEE";
+ case IORING_OP_SHUTDOWN:
+ return "SHUTDOWN";
+ case IORING_OP_RENAMEAT:
+ return "RENAMEAT";
+ case IORING_OP_UNLINKAT:
+ return "UNLINKAT";
+ case IORING_OP_MKDIRAT:
+ return "MKDIRAT";
+ case IORING_OP_SYMLINKAT:
+ return "SYMLINKAT";
+ case IORING_OP_LINKAT:
+ return "LINKAT";
+ case IORING_OP_MSG_RING:
+ return "MSG_RING";
+ case IORING_OP_FSETXATTR:
+ return "FSETXATTR";
+ case IORING_OP_SETXATTR:
+ return "SETXATTR";
+ case IORING_OP_FGETXATTR:
+ return "FGETXATTR";
+ case IORING_OP_GETXATTR:
+ return "GETXATTR";
+ case IORING_OP_SOCKET:
+ return "SOCKET";
+ case IORING_OP_URING_CMD:
+ return "URING_CMD";
+ case IORING_OP_LAST:
+ return "INVALID";
+ }
+ return "INVALID";
+}
+
struct sock *io_uring_get_socket(struct file *file)
{
#if defined(CONFIG_UNIX)
@@ -1219,6 +1492,42 @@ struct sock *io_uring_get_socket(struct file *file)
}
EXPORT_SYMBOL(io_uring_get_socket);
+#if defined(CONFIG_UNIX)
+static inline bool io_file_need_scm(struct file *filp)
+{
+#if defined(IO_URING_SCM_ALL)
+ return true;
+#else
+ return !!unix_get_socket(filp);
+#endif
+}
+#else
+static inline bool io_file_need_scm(struct file *filp)
+{
+ return false;
+}
+#endif
+
+static void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags)
+{
+ lockdep_assert_held(&ctx->uring_lock);
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_unlock(&ctx->uring_lock);
+}
+
+static void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags)
+{
+ /*
+ * "Normal" inline submissions always hold the uring_lock, since we
+ * grab it from the system call. Same is true for the SQPOLL offload.
+ * The only exception is when we've detached the request and issue it
+ * from an async worker thread, grab the lock for that case.
+ */
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_lock(&ctx->uring_lock);
+ lockdep_assert_held(&ctx->uring_lock);
+}
+
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
{
if (!*locked) {
@@ -1280,31 +1589,36 @@ static inline void io_req_set_refcount(struct io_kiocb *req)
#define IO_RSRC_REF_BATCH 100
+static void io_rsrc_put_node(struct io_rsrc_node *node, int nr)
+{
+ percpu_ref_put_many(&node->refs, nr);
+}
+
static inline void io_req_put_rsrc_locked(struct io_kiocb *req,
struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
- struct percpu_ref *ref = req->fixed_rsrc_refs;
+ struct io_rsrc_node *node = req->rsrc_node;
- if (ref) {
- if (ref == &ctx->rsrc_node->refs)
+ if (node) {
+ if (node == ctx->rsrc_node)
ctx->rsrc_cached_refs++;
else
- percpu_ref_put(ref);
+ io_rsrc_put_node(node, 1);
}
}
-static inline void io_req_put_rsrc(struct io_kiocb *req, struct io_ring_ctx *ctx)
+static inline void io_req_put_rsrc(struct io_kiocb *req)
{
- if (req->fixed_rsrc_refs)
- percpu_ref_put(req->fixed_rsrc_refs);
+ if (req->rsrc_node)
+ io_rsrc_put_node(req->rsrc_node, 1);
}
static __cold void io_rsrc_refs_drop(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
if (ctx->rsrc_cached_refs) {
- percpu_ref_put_many(&ctx->rsrc_node->refs, ctx->rsrc_cached_refs);
+ io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs);
ctx->rsrc_cached_refs = 0;
}
}
@@ -1320,8 +1634,8 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
struct io_ring_ctx *ctx,
unsigned int issue_flags)
{
- if (!req->fixed_rsrc_refs) {
- req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
+ if (!req->rsrc_node) {
+ req->rsrc_node = ctx->rsrc_node;
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
lockdep_assert_held(&ctx->uring_lock);
@@ -1329,28 +1643,30 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
if (unlikely(ctx->rsrc_cached_refs < 0))
io_rsrc_refs_refill(ctx);
} else {
- percpu_ref_get(req->fixed_rsrc_refs);
+ percpu_ref_get(&req->rsrc_node->refs);
}
}
}
static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list)
{
- struct io_buffer *kbuf = req->kbuf;
- unsigned int cflags;
+ if (req->flags & REQ_F_BUFFER_RING) {
+ if (req->buf_list)
+ req->buf_list->head++;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ } else {
+ list_add(&req->kbuf->list, list);
+ req->flags &= ~REQ_F_BUFFER_SELECTED;
+ }
- cflags = IORING_CQE_F_BUFFER | (kbuf->bid << IORING_CQE_BUFFER_SHIFT);
- req->flags &= ~REQ_F_BUFFER_SELECTED;
- list_add(&kbuf->list, list);
- req->kbuf = NULL;
- return cflags;
+ return IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
}
static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
{
lockdep_assert_held(&req->ctx->completion_lock);
- if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return 0;
return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
}
@@ -1360,7 +1676,7 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
{
unsigned int cflags;
- if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return 0;
/*
@@ -1375,7 +1691,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
* We migrate buffers from the comp_list to the issue cache list
* when we need one.
*/
- if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (req->flags & REQ_F_BUFFER_RING) {
+ /* no buffers to recycle for this case */
+ cflags = __io_put_kbuf(req, NULL);
+ } else if (issue_flags & IO_URING_F_UNLOCKED) {
struct io_ring_ctx *ctx = req->ctx;
spin_lock(&ctx->completion_lock);
@@ -1393,15 +1712,10 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
unsigned int bgid)
{
- struct list_head *hash_list;
- struct io_buffer_list *bl;
+ if (ctx->io_bl && bgid < BGID_ARRAY)
+ return &ctx->io_bl[bgid];
- hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
- list_for_each_entry(bl, hash_list, list)
- if (bl->bgid == bgid || bgid == -1U)
- return bl;
-
- return NULL;
+ return xa_load(&ctx->io_bl_xa, bgid);
}
static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
@@ -1410,25 +1724,33 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
struct io_buffer_list *bl;
struct io_buffer *buf;
- if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return;
/* don't recycle if we already did IO to this buffer */
if (req->flags & REQ_F_PARTIAL_IO)
return;
+ /*
+ * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+ * the flag and hence ensure that bl->head doesn't get incremented.
+ * If the tail has already been incremented, hang on to it.
+ */
+ if (req->flags & REQ_F_BUFFER_RING) {
+ if (req->buf_list) {
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ }
+ return;
+ }
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_lock(&ctx->uring_lock);
-
- lockdep_assert_held(&ctx->uring_lock);
+ io_ring_submit_lock(ctx, issue_flags);
buf = req->kbuf;
bl = io_buffer_get_list(ctx, buf->bgid);
list_add(&buf->list, &bl->buf_list);
req->flags &= ~REQ_F_BUFFER_SELECTED;
- req->kbuf = NULL;
+ req->buf_index = buf->bgid;
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_unlock(&ctx->uring_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
}
static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
@@ -1469,7 +1791,12 @@ static inline void req_set_fail(struct io_kiocb *req)
static inline void req_fail_link_node(struct io_kiocb *req, int res)
{
req_set_fail(req);
- req->result = res;
+ req->cqe.res = res;
+}
+
+static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx)
+{
+ wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
}
static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref)
@@ -1506,12 +1833,14 @@ static __cold void io_fallback_req_func(struct work_struct *work)
static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
{
struct io_ring_ctx *ctx;
- int i, hash_bits;
+ int hash_bits;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return NULL;
+ xa_init(&ctx->io_bl_xa);
+
/*
* Use 5 bits less than the max cq entries, that should give us around
* 32 entries per hash list if totally full and uniformly spread.
@@ -1533,13 +1862,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
/* set invalid range, so io_import_fixed() fails meeting it */
ctx->dummy_ubuf->ubuf = -1UL;
- ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
- sizeof(struct list_head), GFP_KERNEL);
- if (!ctx->io_buffers)
- goto err;
- for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
- INIT_LIST_HEAD(&ctx->io_buffers[i]);
-
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
goto err;
@@ -1575,7 +1897,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
err:
kfree(ctx->dummy_ubuf);
kfree(ctx->cancel_hash);
- kfree(ctx->io_buffers);
+ kfree(ctx->io_bl);
+ xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
return NULL;
}
@@ -1599,10 +1922,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false;
}
-#define FFS_NOWAIT 0x1UL
-#define FFS_ISREG 0x2UL
-#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
-
static inline bool io_req_ffs_set(struct io_kiocb *req)
{
return req->flags & REQ_F_FIXED_FILE;
@@ -1629,6 +1948,17 @@ static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
return __io_prep_linked_timeout(req);
}
+static noinline void __io_arm_ltimeout(struct io_kiocb *req)
+{
+ io_queue_linked_timeout(__io_prep_linked_timeout(req));
+}
+
+static inline void io_arm_ltimeout(struct io_kiocb *req)
+{
+ if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
+ __io_arm_ltimeout(req);
+}
+
static void io_prep_async_work(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -1641,6 +1971,7 @@ static void io_prep_async_work(struct io_kiocb *req)
req->work.list.next = NULL;
req->work.flags = 0;
+ req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
if (req->flags & REQ_F_FORCE_ASYNC)
req->work.flags |= IO_WQ_WORK_CONCURRENT;
@@ -1672,17 +2003,15 @@ static void io_prep_async_link(struct io_kiocb *req)
static inline void io_req_add_compl_list(struct io_kiocb *req)
{
- struct io_ring_ctx *ctx = req->ctx;
- struct io_submit_state *state = &ctx->submit_state;
+ struct io_submit_state *state = &req->ctx->submit_state;
if (!(req->flags & REQ_F_CQE_SKIP))
- ctx->submit_state.flush_cqes = true;
+ state->flush_cqes = true;
wq_list_add_tail(&req->comp_list, &state->compl_reqs);
}
-static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
+static void io_queue_iowq(struct io_kiocb *req, bool *dont_use)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->task->io_uring;
@@ -1702,8 +2031,9 @@ static void io_queue_async_work(struct io_kiocb *req, bool *dont_use)
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
req->work.flags |= IO_WQ_WORK_CANCEL;
- trace_io_uring_queue_async_work(ctx, req, req->user_data, req->opcode, req->flags,
- &req->work, io_wq_is_hashed(&req->work));
+ trace_io_uring_queue_async_work(req->ctx, req, req->cqe.user_data,
+ req->opcode, req->flags, &req->work,
+ io_wq_is_hashed(&req->work));
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
@@ -1721,8 +2051,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
- io_fill_cqe_req(req, status, 0);
- io_put_req_deferred(req);
+ io_req_tw_post_queue(req, status, 0);
}
}
@@ -1804,21 +2133,53 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
}
-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+/*
+ * writes to the cq entry need to come after reading head; the
+ * control dependency is enough as we're using WRITE_ONCE to
+ * fill the cq entry
+ */
+static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx)
{
struct io_rings *rings = ctx->rings;
- unsigned tail, mask = ctx->cq_entries - 1;
-
- /*
- * writes to the cq entry need to come after reading head; the
- * control dependency is enough as we're using WRITE_ONCE to
- * fill the cq entry
- */
- if (__io_cqring_events(ctx) == ctx->cq_entries)
+ unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
+ unsigned int shift = 0;
+ unsigned int free, queued, len;
+
+ if (ctx->flags & IORING_SETUP_CQE32)
+ shift = 1;
+
+ /* userspace may cheat modifying the tail, be safe and do min */
+ queued = min(__io_cqring_events(ctx), ctx->cq_entries);
+ free = ctx->cq_entries - queued;
+ /* we need a contiguous range, limit based on the current array offset */
+ len = min(free, ctx->cq_entries - off);
+ if (!len)
return NULL;
- tail = ctx->cached_cq_tail++;
- return &rings->cqes[tail & mask];
+ ctx->cached_cq_tail++;
+ ctx->cqe_cached = &rings->cqes[off];
+ ctx->cqe_sentinel = ctx->cqe_cached + len;
+ ctx->cqe_cached++;
+ return &rings->cqes[off << shift];
+}
+
+static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
+{
+ if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
+ struct io_uring_cqe *cqe = ctx->cqe_cached;
+
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ unsigned int off = ctx->cqe_cached - ctx->rings->cqes;
+
+ cqe += off;
+ }
+
+ ctx->cached_cq_tail++;
+ ctx->cqe_cached++;
+ return cqe;
+ }
+
+ return __io_get_cqe(ctx);
}
static void io_eventfd_signal(struct io_ring_ctx *ctx)
@@ -1889,10 +2250,14 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
bool all_flushed, posted;
+ size_t cqe_size = sizeof(struct io_uring_cqe);
if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
return false;
+ if (ctx->flags & IORING_SETUP_CQE32)
+ cqe_size <<= 1;
+
posted = false;
spin_lock(&ctx->completion_lock);
while (!list_empty(&ctx->cq_overflow_list)) {
@@ -1904,7 +2269,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
ocqe = list_first_entry(&ctx->cq_overflow_list,
struct io_overflow_cqe, list);
if (cqe)
- memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
+ memcpy(cqe, &ocqe->cqe, cqe_size);
else
io_account_cq_overflow(ctx);
@@ -1915,13 +2280,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
all_flushed = list_empty(&ctx->cq_overflow_list);
if (all_flushed) {
- clear_bit(0, &ctx->check_cq_overflow);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
+ clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+ atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
}
- if (posted)
- io_commit_cqring(ctx);
+ io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
@@ -1932,7 +2295,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
{
bool ret = true;
- if (test_bit(0, &ctx->check_cq_overflow)) {
+ if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) {
/* iopoll syncs against uring_lock, not completion_lock */
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&ctx->uring_lock);
@@ -1944,19 +2307,23 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
return ret;
}
-/* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task, int nr)
+static void __io_put_task(struct task_struct *task, int nr)
{
struct io_uring_task *tctx = task->io_uring;
- if (likely(task == current)) {
- tctx->cached_refs += nr;
- } else {
- percpu_counter_sub(&tctx->inflight, nr);
- if (unlikely(atomic_read(&tctx->in_idle)))
- wake_up(&tctx->wait);
- put_task_struct_many(task, nr);
- }
+ percpu_counter_sub(&tctx->inflight, nr);
+ if (unlikely(atomic_read(&tctx->in_idle)))
+ wake_up(&tctx->wait);
+ put_task_struct_many(task, nr);
+}
+
+/* must to be called somewhat shortly after putting a request */
+static inline void io_put_task(struct task_struct *task, int nr)
+{
+ if (likely(task == current))
+ task->io_uring->cached_refs += nr;
+ else
+ __io_put_task(task, nr);
}
static void io_task_refs_refill(struct io_uring_task *tctx)
@@ -1990,11 +2357,18 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
}
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+ s32 res, u32 cflags, u64 extra1,
+ u64 extra2)
{
struct io_overflow_cqe *ocqe;
+ size_t ocq_size = sizeof(struct io_overflow_cqe);
+ bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
+
+ if (is_cqe32)
+ ocq_size += sizeof(struct io_uring_cqe);
- ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+ ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
+ trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
if (!ocqe) {
/*
* If we're in ring overflow flush mode, or in task cancel mode,
@@ -2002,17 +2376,21 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
* on the floor.
*/
io_account_cq_overflow(ctx);
+ set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq);
return false;
}
if (list_empty(&ctx->cq_overflow_list)) {
- set_bit(0, &ctx->check_cq_overflow);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
+ set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
+ atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
}
ocqe->cqe.user_data = user_data;
ocqe->cqe.res = res;
ocqe->cqe.flags = cflags;
+ if (is_cqe32) {
+ ocqe->cqe.big_cqe[0] = extra1;
+ ocqe->cqe.big_cqe[1] = extra2;
+ }
list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
return true;
}
@@ -2034,42 +2412,114 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
WRITE_ONCE(cqe->flags, cflags);
return true;
}
- return io_cqring_event_overflow(ctx, user_data, res, cflags);
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
+}
+
+static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
+{
+ struct io_uring_cqe *cqe;
+
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
+
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(*cqe));
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
+}
+
+static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
+{
+ struct io_uring_cqe *cqe;
+ u64 extra1 = req->extra1;
+ u64 extra2 = req->extra2;
+
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, extra1, extra2);
+
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+ cqe->big_cqe[0] = extra1;
+ cqe->big_cqe[1] = extra2;
+ return true;
+ }
+
+ return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
+ req->cqe.flags, extra1, extra2);
}
static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
- trace_io_uring_complete(req->ctx, req, req->user_data, res, cflags);
- return __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
+ return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
-static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
+static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
+ u64 extra1, u64 extra2)
{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_uring_cqe *cqe;
+
+ if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
+ return;
+ if (req->flags & REQ_F_CQE_SKIP)
+ return;
+
+ trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
+ extra1, extra2);
+
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+ WRITE_ONCE(cqe->res, res);
+ WRITE_ONCE(cqe->flags, cflags);
+ WRITE_ONCE(cqe->big_cqe[0], extra1);
+ WRITE_ONCE(cqe->big_cqe[1], extra2);
+ return;
+ }
+
+ io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
}
static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
s32 res, u32 cflags)
{
ctx->cq_extra++;
- trace_io_uring_complete(ctx, NULL, user_data, res, cflags);
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
return __io_fill_cqe(ctx, user_data, res, cflags);
}
-static void __io_req_complete_post(struct io_kiocb *req, s32 res,
- u32 cflags)
+static void __io_req_complete_put(struct io_kiocb *req)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
/*
* If we're the last reference to this request, add to our locked
* free_list cache.
*/
if (req_ref_put_and_test(req)) {
- if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ if (req->flags & IO_REQ_LINK_FLAGS) {
if (req->flags & IO_DISARM_MASK)
io_disarm_next(req);
if (req->link) {
@@ -2077,7 +2527,7 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
req->link = NULL;
}
}
- io_req_put_rsrc(req, ctx);
+ io_req_put_rsrc(req);
/*
* Selected buffer deallocation in io_clean_op() assumes that
* we don't hold ->completion_lock. Clean them here to avoid
@@ -2091,8 +2541,23 @@ static void __io_req_complete_post(struct io_kiocb *req, s32 res,
}
}
-static void io_req_complete_post(struct io_kiocb *req, s32 res,
- u32 cflags)
+static void __io_req_complete_post(struct io_kiocb *req, s32 res,
+ u32 cflags)
+{
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe_req(req, res, cflags);
+ __io_req_complete_put(req);
+}
+
+static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
+ u32 cflags, u64 extra1, u64 extra2)
+{
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+ __io_req_complete_put(req);
+}
+
+static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -2103,11 +2568,23 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res,
io_cqring_ev_posted(ctx);
}
+static void io_req_complete_post32(struct io_kiocb *req, s32 res,
+ u32 cflags, u64 extra1, u64 extra2)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+
+ spin_lock(&ctx->completion_lock);
+ __io_req_complete_post32(req, res, cflags, extra1, extra2);
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ io_cqring_ev_posted(ctx);
+}
+
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
- req->result = res;
- req->cflags = cflags;
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
req->flags |= REQ_F_COMPLETE_INLINE;
}
@@ -2120,8 +2597,23 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
io_req_complete_post(req, res, cflags);
}
+static inline void __io_req_complete32(struct io_kiocb *req,
+ unsigned int issue_flags, s32 res,
+ u32 cflags, u64 extra1, u64 extra2)
+{
+ if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
+ io_req_complete_state(req, res, cflags);
+ req->extra1 = extra1;
+ req->extra2 = extra2;
+ } else {
+ io_req_complete_post32(req, res, cflags, extra1, extra2);
+ }
+}
+
static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
+ if (res < 0)
+ req_set_fail(req);
__io_req_complete(req, 0, res, 0);
}
@@ -2131,17 +2623,6 @@ static void io_req_complete_failed(struct io_kiocb *req, s32 res)
io_req_complete_post(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
}
-static void io_req_complete_fail_submit(struct io_kiocb *req)
-{
- /*
- * We don't submit, fail them all, for that replace hardlinks with
- * normal links. Extra REQ_F_LINK is tolerated.
- */
- req->flags &= ~REQ_F_HARDLINK;
- req->flags |= REQ_F_LINK;
- io_req_complete_failed(req, req->result);
-}
-
/*
* Don't initialise the fields below on every allocation, but do that in
* advance and keep them valid across allocations.
@@ -2152,7 +2633,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
req->link = NULL;
req->async_data = NULL;
/* not necessary, but safer to zero */
- req->result = 0;
+ req->cqe.res = 0;
}
static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
@@ -2164,19 +2645,9 @@ static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
spin_unlock(&ctx->completion_lock);
}
-/* Returns true IFF there are requests in the cache */
-static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
+static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
{
- struct io_submit_state *state = &ctx->submit_state;
-
- /*
- * If we have more than a batch's worth of requests in our IRQ side
- * locked cache, grab the lock and move them over to our submission
- * side cache.
- */
- if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
- io_flush_cached_locked_reqs(ctx, state);
- return !!state->free_list.next;
+ return !ctx->submit_state.free_list.next;
}
/*
@@ -2188,14 +2659,20 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
- struct io_submit_state *state = &ctx->submit_state;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
void *reqs[IO_REQ_ALLOC_BATCH];
- struct io_kiocb *req;
int ret, i;
- if (likely(state->free_list.next || io_flush_cached_reqs(ctx)))
- return true;
+ /*
+ * If we have more than a batch's worth of requests in our IRQ side
+ * locked cache, grab the lock and move them over to our submission
+ * side cache.
+ */
+ if (data_race(ctx->locked_free_nr) > IO_COMPL_BATCH) {
+ io_flush_cached_locked_reqs(ctx, &ctx->submit_state);
+ if (!io_req_cache_empty(ctx))
+ return true;
+ }
ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs);
@@ -2212,17 +2689,17 @@ static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
percpu_ref_get_many(&ctx->refs, ret);
for (i = 0; i < ret; i++) {
- req = reqs[i];
+ struct io_kiocb *req = reqs[i];
io_preinit_req(req, ctx);
- wq_stack_add_head(&req->comp_list, &state->free_list);
+ io_req_add_to_cache(req, ctx);
}
return true;
}
static inline bool io_alloc_req_refill(struct io_ring_ctx *ctx)
{
- if (unlikely(!ctx->submit_state.free_list.next))
+ if (unlikely(io_req_cache_empty(ctx)))
return __io_alloc_req_refill(ctx);
return true;
}
@@ -2251,11 +2728,11 @@ static inline void io_dismantle_req(struct io_kiocb *req)
io_put_file(req->file);
}
-static __cold void __io_free_req(struct io_kiocb *req)
+static __cold void io_free_req(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- io_req_put_rsrc(req, ctx);
+ io_req_put_rsrc(req);
io_dismantle_req(req);
io_put_task(req->task, 1);
@@ -2273,7 +2750,7 @@ static inline void io_remove_next_linked(struct io_kiocb *req)
nxt->link = NULL;
}
-static bool io_kill_linked_timeout(struct io_kiocb *req)
+static struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
__must_hold(&req->ctx->timeout_lock)
{
@@ -2286,13 +2763,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
list_del(&link->timeout.list);
- /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
- io_fill_cqe_req(link, -ECANCELED, 0);
- io_put_req_deferred(link);
- return true;
+ return link;
}
}
- return false;
+ return NULL;
}
static void io_fail_links(struct io_kiocb *req)
@@ -2306,19 +2780,19 @@ static void io_fail_links(struct io_kiocb *req)
long res = -ECANCELED;
if (link->flags & REQ_F_FAIL)
- res = link->result;
+ res = link->cqe.res;
nxt = link->link;
link->link = NULL;
- trace_io_uring_fail_link(req->ctx, req, req->user_data,
+ trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
req->opcode, link);
- if (!ignore_cqes) {
+ if (ignore_cqes)
+ link->flags |= REQ_F_CQE_SKIP;
+ else
link->flags &= ~REQ_F_CQE_SKIP;
- io_fill_cqe_req(link, res, 0);
- }
- io_put_req_deferred(link);
+ __io_req_complete_post(link, res, 0);
link = nxt;
}
}
@@ -2326,25 +2800,27 @@ static void io_fail_links(struct io_kiocb *req)
static bool io_disarm_next(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
{
+ struct io_kiocb *link = NULL;
bool posted = false;
if (req->flags & REQ_F_ARM_LTIMEOUT) {
- struct io_kiocb *link = req->link;
-
+ link = req->link;
req->flags &= ~REQ_F_ARM_LTIMEOUT;
if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
io_remove_next_linked(req);
- /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
- io_fill_cqe_req(link, -ECANCELED, 0);
- io_put_req_deferred(link);
+ io_req_tw_post_queue(link, -ECANCELED, 0);
posted = true;
}
} else if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
spin_lock_irq(&ctx->timeout_lock);
- posted = io_kill_linked_timeout(req);
+ link = io_disarm_linked_timeout(req);
spin_unlock_irq(&ctx->timeout_lock);
+ if (link) {
+ posted = true;
+ io_req_tw_post_queue(link, -ECANCELED, 0);
+ }
}
if (unlikely((req->flags & REQ_F_FAIL) &&
!(req->flags & REQ_F_HARDLINK))) {
@@ -2361,8 +2837,7 @@ static void __io_req_find_next_prep(struct io_kiocb *req)
spin_lock(&ctx->completion_lock);
posted = io_disarm_next(req);
- if (posted)
- io_commit_cqring(ctx);
+ io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
@@ -2372,8 +2847,6 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
{
struct io_kiocb *nxt;
- if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK))))
- return NULL;
/*
* If LINK is set, we have dependent requests in this chain. If we
* didn't fail this request, queue the first one up, moving any other
@@ -2391,6 +2864,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
{
if (!ctx)
return;
+ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+ atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
if (*locked) {
io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
@@ -2434,7 +2909,7 @@ static void handle_prev_tw_list(struct io_wq_work_node *node,
if (likely(*uring_locked))
req->io_task_work.func(req, uring_locked);
else
- __io_req_complete_post(req, req->result,
+ __io_req_complete_post(req, req->cqe.res,
io_put_kbuf_comp(req));
node = next;
} while (node);
@@ -2475,15 +2950,11 @@ static void tctx_task_work(struct callback_head *cb)
while (1) {
struct io_wq_work_node *node1, *node2;
- if (!tctx->task_list.first &&
- !tctx->prior_task_list.first && uring_locked)
- io_submit_flush_completions(ctx);
-
spin_lock_irq(&tctx->task_lock);
- node1 = tctx->prior_task_list.first;
+ node1 = tctx->prio_task_list.first;
node2 = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
- INIT_WQ_LIST(&tctx->prior_task_list);
+ INIT_WQ_LIST(&tctx->prio_task_list);
if (!node2 && !node1)
tctx->task_running = false;
spin_unlock_irq(&tctx->task_lock);
@@ -2492,10 +2963,13 @@ static void tctx_task_work(struct callback_head *cb)
if (node1)
handle_prev_tw_list(node1, &ctx, &uring_locked);
-
if (node2)
handle_tw_list(node2, &ctx, &uring_locked);
cond_resched();
+
+ if (data_race(!tctx->task_list.first) &&
+ data_race(!tctx->prio_task_list.first) && uring_locked)
+ io_submit_flush_completions(ctx);
}
ctx_flush_and_put(ctx, &uring_locked);
@@ -2505,24 +2979,19 @@ static void tctx_task_work(struct callback_head *cb)
io_uring_drop_tctx_refs(current);
}
-static void io_req_task_work_add(struct io_kiocb *req, bool priority)
+static void __io_req_task_work_add(struct io_kiocb *req,
+ struct io_uring_task *tctx,
+ struct io_wq_work_list *list)
{
- struct task_struct *tsk = req->task;
- struct io_uring_task *tctx = tsk->io_uring;
- enum task_work_notify_mode notify;
+ struct io_ring_ctx *ctx = req->ctx;
struct io_wq_work_node *node;
unsigned long flags;
bool running;
- WARN_ON_ONCE(!tctx);
-
io_drop_inflight_file(req);
spin_lock_irqsave(&tctx->task_lock, flags);
- if (priority)
- wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list);
- else
- wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+ wq_list_add_tail(&req->io_task_work.node, list);
running = tctx->task_running;
if (!running)
tctx->task_running = true;
@@ -2532,22 +3001,15 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
if (running)
return;
- /*
- * SQPOLL kernel thread doesn't need notification, just a wakeup. For
- * all other cases, use TWA_SIGNAL unconditionally to ensure we're
- * processing task_work. There's no reliable way to tell if TWA_RESUME
- * will do the job.
- */
- notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
- if (likely(!task_work_add(tsk, &tctx->task_work, notify))) {
- if (notify == TWA_NONE)
- wake_up_process(tsk);
+ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+ atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+
+ if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
return;
- }
spin_lock_irqsave(&tctx->task_lock, flags);
tctx->task_running = false;
- node = wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
+ node = wq_list_merge(&tctx->prio_task_list, &tctx->task_list);
spin_unlock_irqrestore(&tctx->task_lock, flags);
while (node) {
@@ -2559,47 +3021,73 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
}
}
-static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+static void io_req_task_work_add(struct io_kiocb *req)
{
- struct io_ring_ctx *ctx = req->ctx;
+ struct io_uring_task *tctx = req->task->io_uring;
+
+ __io_req_task_work_add(req, tctx, &tctx->task_list);
+}
+
+static void io_req_task_prio_work_add(struct io_kiocb *req)
+{
+ struct io_uring_task *tctx = req->task->io_uring;
+
+ if (req->ctx->flags & IORING_SETUP_SQPOLL)
+ __io_req_task_work_add(req, tctx, &tctx->prio_task_list);
+ else
+ __io_req_task_work_add(req, tctx, &tctx->task_list);
+}
+
+static void io_req_tw_post(struct io_kiocb *req, bool *locked)
+{
+ io_req_complete_post(req, req->cqe.res, req->cqe.flags);
+}
+
+static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags)
+{
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
+ req->io_task_work.func = io_req_tw_post;
+ io_req_task_work_add(req);
+}
+static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
+{
/* not needed for normal modes, but SQPOLL depends on it */
- io_tw_lock(ctx, locked);
- io_req_complete_failed(req, req->result);
+ io_tw_lock(req->ctx, locked);
+ io_req_complete_failed(req, req->cqe.res);
}
static void io_req_task_submit(struct io_kiocb *req, bool *locked)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- io_tw_lock(ctx, locked);
+ io_tw_lock(req->ctx, locked);
/* req->task == current here, checking PF_EXITING is safe */
if (likely(!(req->task->flags & PF_EXITING)))
- __io_queue_sqe(req);
+ io_queue_sqe(req);
else
io_req_complete_failed(req, -EFAULT);
}
static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
{
- req->result = ret;
+ req->cqe.res = ret;
req->io_task_work.func = io_req_task_cancel;
- io_req_task_work_add(req, false);
+ io_req_task_work_add(req);
}
static void io_req_task_queue(struct io_kiocb *req)
{
req->io_task_work.func = io_req_task_submit;
- io_req_task_work_add(req, false);
+ io_req_task_work_add(req);
}
static void io_req_task_queue_reissue(struct io_kiocb *req)
{
- req->io_task_work.func = io_queue_async_work;
- io_req_task_work_add(req, false);
+ req->io_task_work.func = io_queue_iowq;
+ io_req_task_work_add(req);
}
-static inline void io_queue_next(struct io_kiocb *req)
+static void io_queue_next(struct io_kiocb *req)
{
struct io_kiocb *nxt = io_req_find_next(req);
@@ -2607,17 +3095,6 @@ static inline void io_queue_next(struct io_kiocb *req)
io_req_task_queue(nxt);
}
-static void io_free_req(struct io_kiocb *req)
-{
- io_queue_next(req);
- __io_free_req(req);
-}
-
-static void io_free_req_work(struct io_kiocb *req, bool *locked)
-{
- io_free_req(req);
-}
-
static void io_free_batch_list(struct io_ring_ctx *ctx,
struct io_wq_work_node *node)
__must_hold(&ctx->uring_lock)
@@ -2629,15 +3106,30 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (unlikely(req->flags & REQ_F_REFCOUNT)) {
- node = req->comp_list.next;
- if (!req_ref_put_and_test(req))
- continue;
+ if (unlikely(req->flags & IO_REQ_CLEAN_SLOW_FLAGS)) {
+ if (req->flags & REQ_F_REFCOUNT) {
+ node = req->comp_list.next;
+ if (!req_ref_put_and_test(req))
+ continue;
+ }
+ if ((req->flags & REQ_F_POLLED) && req->apoll) {
+ struct async_poll *apoll = req->apoll;
+
+ if (apoll->double_poll)
+ kfree(apoll->double_poll);
+ list_add(&apoll->poll.wait.entry,
+ &ctx->apoll_cache);
+ req->flags &= ~REQ_F_POLLED;
+ }
+ if (req->flags & IO_REQ_LINK_FLAGS)
+ io_queue_next(req);
+ if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS))
+ io_clean_op(req);
}
+ if (!(req->flags & REQ_F_FIXED_FILE))
+ io_put_file(req->file);
io_req_put_rsrc_locked(req, ctx);
- io_queue_next(req);
- io_dismantle_req(req);
if (req->task != task) {
if (task)
@@ -2647,7 +3139,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
}
task_refs++;
node = req->comp_list.next;
- wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
+ io_req_add_to_cache(req, ctx);
} while (node);
if (task)
@@ -2666,16 +3158,11 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, req->result, req->cflags);
- if ((req->flags & REQ_F_POLLED) && req->apoll) {
- struct async_poll *apoll = req->apoll;
-
- if (apoll->double_poll)
- kfree(apoll->double_poll);
- list_add(&apoll->poll.wait.entry,
- &ctx->apoll_cache);
- req->flags &= ~REQ_F_POLLED;
+ if (!(req->flags & REQ_F_CQE_SKIP)) {
+ if (!(ctx->flags & IORING_SETUP_CQE32))
+ __io_fill_cqe_req_filled(ctx, req);
+ else
+ __io_fill_cqe32_req_filled(ctx, req);
}
}
@@ -2698,23 +3185,18 @@ static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
struct io_kiocb *nxt = NULL;
if (req_ref_put_and_test(req)) {
- nxt = io_req_find_next(req);
- __io_free_req(req);
+ if (unlikely(req->flags & IO_REQ_LINK_FLAGS))
+ nxt = io_req_find_next(req);
+ io_free_req(req);
}
return nxt;
}
static inline void io_put_req(struct io_kiocb *req)
{
- if (req_ref_put_and_test(req))
- io_free_req(req);
-}
-
-static inline void io_put_req_deferred(struct io_kiocb *req)
-{
if (req_ref_put_and_test(req)) {
- req->io_task_work.func = io_free_req_work;
- io_req_task_work_add(req, false);
+ io_queue_next(req);
+ io_free_req(req);
}
}
@@ -2797,11 +3279,10 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
/* order with io_complete_rw_iopoll(), e.g. ->result updates */
if (!smp_load_acquire(&req->iopoll_completed))
break;
+ nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
-
- __io_fill_cqe_req(req, req->result, io_put_kbuf(req, 0));
- nr_events++;
+ __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
}
if (unlikely(!nr_events))
@@ -2847,22 +3328,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
{
unsigned int nr_events = 0;
int ret = 0;
+ unsigned long check_cq;
/*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
- /*
* Don't enter poll loop if we already have events pending.
* If we do, we can potentially be spinning for commands that
* already triggered a CQE (eg in error).
*/
- if (test_bit(0, &ctx->check_cq_overflow))
+ check_cq = READ_ONCE(ctx->check_cq);
+ if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
__io_cqring_overflow_flush(ctx, false);
if (io_cqring_events(ctx))
- goto out;
+ return 0;
+
+ /*
+ * Similarly do not spin if we have not informed the user of any
+ * dropped CQE.
+ */
+ if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)))
+ return -EBADR;
+
do {
/*
* If a submit got punted to a workqueue, we can have the
@@ -2892,8 +3377,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
nr_events += ret;
ret = 0;
} while (nr_events < min && !need_resched());
-out:
- mutex_unlock(&ctx->uring_lock);
+
return ret;
}
@@ -2966,21 +3450,21 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
} else {
fsnotify_access(req->file);
}
- if (unlikely(res != req->result)) {
+ if (unlikely(res != req->cqe.res)) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
return true;
}
req_set_fail(req);
- req->result = res;
+ req->cqe.res = res;
}
return false;
}
static inline void io_req_task_complete(struct io_kiocb *req, bool *locked)
{
- int res = req->result;
+ int res = req->cqe.res;
if (*locked) {
io_req_complete_state(req, res, io_put_kbuf(req, 0));
@@ -2996,7 +3480,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res,
{
if (__io_complete_rw_common(req, res))
return;
- __io_req_complete(req, issue_flags, req->result,
+ __io_req_complete(req, issue_flags, req->cqe.res,
io_put_kbuf(req, issue_flags));
}
@@ -3006,9 +3490,9 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
if (__io_complete_rw_common(req, res))
return;
- req->result = res;
+ req->cqe.res = res;
req->io_task_work.func = io_req_task_complete;
- io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
+ io_req_task_prio_work_add(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
@@ -3017,12 +3501,12 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if (unlikely(res != req->result)) {
+ if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
return;
}
- req->result = res;
+ req->cqe.res = res;
}
/* order with io_iopoll_complete() checking ->iopoll_completed */
@@ -3132,6 +3616,8 @@ static unsigned int io_file_get_flags(struct file *file)
res |= FFS_ISREG;
if (__io_file_supports_nowait(file, mode))
res |= FFS_NOWAIT;
+ if (io_file_need_scm(file))
+ res |= FFS_SCM;
return res;
}
@@ -3163,6 +3649,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
req->rw.flags = READ_ONCE(sqe->rw_flags);
+ /* used for fixed read/write too - just read unconditionally */
req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -3311,77 +3798,96 @@ static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
return __io_import_fixed(req, rw, iter, imu);
}
-static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
-{
- if (needs_lock)
- mutex_unlock(&ctx->uring_lock);
-}
-
-static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
+static int io_buffer_add_list(struct io_ring_ctx *ctx,
+ struct io_buffer_list *bl, unsigned int bgid)
{
- /*
- * "Normal" inline submissions always hold the uring_lock, since we
- * grab it from the system call. Same is true for the SQPOLL offload.
- * The only exception is when we've detached the request and issue it
- * from an async worker thread, grab the lock for that case.
- */
- if (needs_lock)
- mutex_lock(&ctx->uring_lock);
-}
-
-static void io_buffer_add_list(struct io_ring_ctx *ctx,
- struct io_buffer_list *bl, unsigned int bgid)
-{
- struct list_head *list;
-
- list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
- INIT_LIST_HEAD(&bl->buf_list);
bl->bgid = bgid;
- list_add(&bl->list, list);
+ if (bgid < BGID_ARRAY)
+ return 0;
+
+ return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
}
-static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
- int bgid, unsigned int issue_flags)
+static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+ struct io_buffer_list *bl)
{
- struct io_buffer *kbuf = req->kbuf;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
- struct io_ring_ctx *ctx = req->ctx;
- struct io_buffer_list *bl;
-
- if (req->flags & REQ_F_BUFFER_SELECTED)
- return kbuf;
+ if (!list_empty(&bl->buf_list)) {
+ struct io_buffer *kbuf;
- io_ring_submit_lock(ctx, needs_lock);
-
- lockdep_assert_held(&ctx->uring_lock);
-
- bl = io_buffer_get_list(ctx, bgid);
- if (bl && !list_empty(&bl->buf_list)) {
kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
list_del(&kbuf->list);
if (*len > kbuf->len)
*len = kbuf->len;
req->flags |= REQ_F_BUFFER_SELECTED;
req->kbuf = kbuf;
+ req->buf_index = kbuf->bid;
+ return u64_to_user_ptr(kbuf->addr);
+ }
+ return NULL;
+}
+
+static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
+ struct io_buffer_list *bl,
+ unsigned int issue_flags)
+{
+ struct io_uring_buf_ring *br = bl->buf_ring;
+ struct io_uring_buf *buf;
+ __u32 head = bl->head;
+
+ if (unlikely(smp_load_acquire(&br->tail) == head)) {
+ io_ring_submit_unlock(req->ctx, issue_flags);
+ return NULL;
+ }
+
+ head &= bl->mask;
+ if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
+ buf = &br->bufs[head];
} else {
- kbuf = ERR_PTR(-ENOBUFS);
+ int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
+ int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+ buf = page_address(bl->buf_pages[index]);
+ buf += off;
}
+ if (*len > buf->len)
+ *len = buf->len;
+ req->flags |= REQ_F_BUFFER_RING;
+ req->buf_list = bl;
+ req->buf_index = buf->bid;
- io_ring_submit_unlock(req->ctx, needs_lock);
- return kbuf;
+ if (issue_flags & IO_URING_F_UNLOCKED) {
+ /*
+ * If we came in unlocked, we have no choice but to consume the
+ * buffer here. This does mean it'll be pinned until the IO
+ * completes. But coming in unlocked means we're in io-wq
+ * context, hence there should be no further retry. For the
+ * locked case, the caller must ensure to call the commit when
+ * the transfer completes (or if we get -EAGAIN and must poll
+ * or retry).
+ */
+ req->buf_list = NULL;
+ bl->head++;
+ }
+ return u64_to_user_ptr(buf->addr);
}
-static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
- unsigned int issue_flags)
+static void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+ unsigned int issue_flags)
{
- struct io_buffer *kbuf;
- u16 bgid;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer_list *bl;
+ void __user *ret = NULL;
+
+ io_ring_submit_lock(req->ctx, issue_flags);
- bgid = req->buf_index;
- kbuf = io_buffer_select(req, len, bgid, issue_flags);
- if (IS_ERR(kbuf))
- return kbuf;
- return u64_to_user_ptr(kbuf->addr);
+ bl = io_buffer_get_list(ctx, req->buf_index);
+ if (likely(bl)) {
+ if (bl->buf_nr_pages)
+ ret = io_ring_buffer_select(req, len, bl, issue_flags);
+ else
+ ret = io_provided_buffer_select(req, len, bl);
+ }
+ io_ring_submit_unlock(req->ctx, issue_flags);
+ return ret;
}
#ifdef CONFIG_COMPAT
@@ -3391,7 +3897,7 @@ static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
struct compat_iovec __user *uiov;
compat_ssize_t clen;
void __user *buf;
- ssize_t len;
+ size_t len;
uiov = u64_to_user_ptr(req->rw.addr);
if (!access_ok(uiov, sizeof(*uiov)))
@@ -3402,11 +3908,12 @@ static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
return -EINVAL;
len = clen;
- buf = io_rw_buffer_select(req, &len, issue_flags);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
+ buf = io_buffer_select(req, &len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ req->rw.addr = (unsigned long) buf;
iov[0].iov_base = buf;
- iov[0].iov_len = (compat_size_t) len;
+ req->rw.len = iov[0].iov_len = (compat_size_t) len;
return 0;
}
#endif
@@ -3424,22 +3931,21 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
len = iov[0].iov_len;
if (len < 0)
return -EINVAL;
- buf = io_rw_buffer_select(req, &len, issue_flags);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
+ buf = io_buffer_select(req, &len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ req->rw.addr = (unsigned long) buf;
iov[0].iov_base = buf;
- iov[0].iov_len = len;
+ req->rw.len = iov[0].iov_len = len;
return 0;
}
static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
unsigned int issue_flags)
{
- if (req->flags & REQ_F_BUFFER_SELECTED) {
- struct io_buffer *kbuf = req->kbuf;
-
- iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
- iov[0].iov_len = kbuf->len;
+ if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
+ iov[0].iov_base = u64_to_user_ptr(req->rw.addr);
+ iov[0].iov_len = req->rw.len;
return 0;
}
if (req->rw.len != 1)
@@ -3453,6 +3959,13 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
return __io_iov_buffer_select(req, iov, issue_flags);
}
+static inline bool io_do_buffer_select(struct io_kiocb *req)
+{
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return false;
+ return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+}
+
static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
struct io_rw_state *s,
unsigned int issue_flags)
@@ -3471,18 +3984,15 @@ static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req,
return NULL;
}
- /* buffer index only valid with fixed read/write, or buffer select */
- if (unlikely(req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)))
- return ERR_PTR(-EINVAL);
-
buf = u64_to_user_ptr(req->rw.addr);
sqe_len = req->rw.len;
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
- if (req->flags & REQ_F_BUFFER_SELECT) {
- buf = io_rw_buffer_select(req, &sqe_len, issue_flags);
- if (IS_ERR(buf))
- return ERR_CAST(buf);
+ if (io_do_buffer_select(req)) {
+ buf = io_buffer_select(req, &sqe_len, issue_flags);
+ if (!buf)
+ return ERR_PTR(-ENOBUFS);
+ req->rw.addr = (unsigned long) buf;
req->rw.len = sqe_len;
}
@@ -3784,6 +4294,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
return -EOPNOTSUPP;
+ kiocb->private = NULL;
kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
@@ -3832,9 +4343,11 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
iovec = NULL;
}
ret = io_rw_init_file(req, FMODE_READ);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ kfree(iovec);
return ret;
- req->result = iov_iter_count(&s->iter);
+ }
+ req->cqe.res = iov_iter_count(&s->iter);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
@@ -3850,7 +4363,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ppos = io_kiocb_update_pos(req);
- ret = rw_verify_area(READ, req->file, ppos, req->result);
+ ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -3872,7 +4385,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ret = 0;
} else if (ret == -EIOCBQUEUED) {
goto out_free;
- } else if (ret == req->result || ret <= 0 || !force_nonblock ||
+ } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
@@ -3958,9 +4471,11 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
iovec = NULL;
}
ret = io_rw_init_file(req, FMODE_WRITE);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ kfree(iovec);
return ret;
- req->result = iov_iter_count(&s->iter);
+ }
+ req->cqe.res = iov_iter_count(&s->iter);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
@@ -3980,7 +4495,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
ppos = io_kiocb_update_pos(req);
- ret = rw_verify_area(WRITE, req->file, ppos, req->result);
+ ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
if (unlikely(ret))
goto out_free;
@@ -4044,9 +4559,7 @@ static int io_renameat_prep(struct io_kiocb *req,
struct io_rename *ren = &req->rename;
const char __user *oldf, *newf;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4083,22 +4596,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
ren->newpath, ren->flags);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
+static inline void __io_xattr_finish(struct io_kiocb *req)
+{
+ struct io_xattr *ix = &req->xattr;
+
+ if (ix->filename)
+ putname(ix->filename);
+
+ kfree(ix->ctx.kname);
+ kvfree(ix->ctx.kvalue);
+}
+
+static void io_xattr_finish(struct io_kiocb *req, int ret)
+{
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+
+ __io_xattr_finish(req);
+ io_req_complete(req, ret);
+}
+
+static int __io_getxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *name;
+ int ret;
+
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ ix->filename = NULL;
+ ix->ctx.kvalue = NULL;
+ name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ ix->ctx.size = READ_ONCE(sqe->len);
+ ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+ if (ix->ctx.flags)
+ return -EINVAL;
+
+ ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+ if (!ix->ctx.kname)
+ return -ENOMEM;
+
+ ret = strncpy_from_user(ix->ctx.kname->name, name,
+ sizeof(ix->ctx.kname->name));
+ if (!ret || ret == sizeof(ix->ctx.kname->name))
+ ret = -ERANGE;
+ if (ret < 0) {
+ kfree(ix->ctx.kname);
+ return ret;
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_fgetxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_getxattr_prep(req, sqe);
+}
+
+static int io_getxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *path;
+ int ret;
+
+ ret = __io_getxattr_prep(req, sqe);
+ if (ret)
+ return ret;
+
+ path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+ ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+ if (IS_ERR(ix->filename)) {
+ ret = PTR_ERR(ix->filename);
+ ix->filename = NULL;
+ }
+
+ return ret;
+}
+
+static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_xattr *ix = &req->xattr;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
+ req->file->f_path.dentry,
+ &ix->ctx);
+
+ io_xattr_finish(req, ret);
+ return 0;
+}
+
+static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_xattr *ix = &req->xattr;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ struct path path;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+retry:
+ ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+ if (!ret) {
+ ret = do_getxattr(mnt_user_ns(path.mnt),
+ path.dentry,
+ &ix->ctx);
+
+ path_put(&path);
+ if (retry_estale(ret, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
+ }
+ }
+
+ io_xattr_finish(req, ret);
+ return 0;
+}
+
+static int __io_setxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *name;
+ int ret;
+
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ ix->filename = NULL;
+ name = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ ix->ctx.kvalue = NULL;
+ ix->ctx.size = READ_ONCE(sqe->len);
+ ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
+
+ ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
+ if (!ix->ctx.kname)
+ return -ENOMEM;
+
+ ret = setxattr_copy(name, &ix->ctx);
+ if (ret) {
+ kfree(ix->ctx.kname);
+ return ret;
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_setxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_xattr *ix = &req->xattr;
+ const char __user *path;
+ int ret;
+
+ ret = __io_setxattr_prep(req, sqe);
+ if (ret)
+ return ret;
+
+ path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
+
+ ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
+ if (IS_ERR(ix->filename)) {
+ ret = PTR_ERR(ix->filename);
+ ix->filename = NULL;
+ }
+
+ return ret;
+}
+
+static int io_fsetxattr_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_setxattr_prep(req, sqe);
+}
+
+static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
+ struct path *path)
+{
+ struct io_xattr *ix = &req->xattr;
+ int ret;
+
+ ret = mnt_want_write(path->mnt);
+ if (!ret) {
+ ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx);
+ mnt_drop_write(path->mnt);
+ }
+
+ return ret;
+}
+
+static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = __io_setxattr(req, issue_flags, &req->file->f_path);
+ io_xattr_finish(req, ret);
+
+ return 0;
+}
+
+static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_xattr *ix = &req->xattr;
+ unsigned int lookup_flags = LOOKUP_FOLLOW;
+ struct path path;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+retry:
+ ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
+ if (!ret) {
+ ret = __io_setxattr(req, issue_flags, &path);
+ path_put(&path);
+ if (retry_estale(ret, lookup_flags)) {
+ lookup_flags |= LOOKUP_REVAL;
+ goto retry;
+ }
+ }
+
+ io_xattr_finish(req, ret);
+ return 0;
+}
+
static int io_unlinkat_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_unlink *un = &req->unlink;
const char __user *fname;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
- sqe->splice_fd_in)
+ if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4132,8 +4880,6 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
ret = do_unlinkat(un->dfd, un->filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
@@ -4144,10 +4890,7 @@ static int io_mkdirat_prep(struct io_kiocb *req,
struct io_mkdir *mkd = &req->mkdir;
const char __user *fname;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
- sqe->splice_fd_in)
+ if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4175,8 +4918,6 @@ static int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
@@ -4187,10 +4928,7 @@ static int io_symlinkat_prep(struct io_kiocb *req,
struct io_symlink *sl = &req->symlink;
const char __user *oldpath, *newpath;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
- sqe->splice_fd_in)
+ if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4224,8 +4962,6 @@ static int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
@@ -4236,9 +4972,7 @@ static int io_linkat_prep(struct io_kiocb *req,
struct io_hardlink *lnk = &req->hardlink;
const char __user *oldf, *newf;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4275,9 +5009,97 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
lnk->newpath, lnk->flags);
req->flags &= ~REQ_F_NEED_CLEANUP;
+ io_req_complete(req, ret);
+ return 0;
+}
+
+static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
+{
+ req->uring_cmd.task_work_cb(&req->uring_cmd);
+}
+
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *))
+{
+ struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
+ req->uring_cmd.task_work_cb = task_work_cb;
+ req->io_task_work.func = io_uring_cmd_work;
+ io_req_task_prio_work_add(req);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+
+/*
+ * Called by consumers of io_uring_cmd, if they originally returned
+ * -EIOCBQUEUED upon receiving the command.
+ */
+void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
+{
+ struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
if (ret < 0)
req_set_fail(req);
- io_req_complete(req, ret);
+ if (req->ctx->flags & IORING_SETUP_CQE32)
+ __io_req_complete32(req, 0, ret, 0, res2, 0);
+ else
+ io_req_complete(req, ret);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_done);
+
+static int io_uring_cmd_prep_async(struct io_kiocb *req)
+{
+ size_t cmd_size;
+
+ cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
+
+ memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
+ return 0;
+}
+
+static int io_uring_cmd_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_uring_cmd *ioucmd = &req->uring_cmd;
+
+ if (sqe->rw_flags)
+ return -EINVAL;
+ ioucmd->cmd = sqe->cmd;
+ ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+ return 0;
+}
+
+static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_uring_cmd *ioucmd = &req->uring_cmd;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct file *file = req->file;
+ int ret;
+
+ if (!req->file->f_op->uring_cmd)
+ return -EOPNOTSUPP;
+
+ if (ctx->flags & IORING_SETUP_SQE128)
+ issue_flags |= IO_URING_F_SQE128;
+ if (ctx->flags & IORING_SETUP_CQE32)
+ issue_flags |= IO_URING_F_CQE32;
+ if (ctx->flags & IORING_SETUP_IOPOLL)
+ issue_flags |= IO_URING_F_IOPOLL;
+
+ if (req_has_async_data(req))
+ ioucmd->cmd = req->async_data;
+
+ ret = file->f_op->uring_cmd(ioucmd, issue_flags);
+ if (ret == -EAGAIN) {
+ if (!req_has_async_data(req)) {
+ if (io_alloc_async_data(req))
+ return -ENOMEM;
+ io_uring_cmd_prep_async(req);
+ }
+ return -EAGAIN;
+ }
+
+ if (ret != -EIOCBQUEUED)
+ io_uring_cmd_done(ioucmd, ret, 0);
return 0;
}
@@ -4285,9 +5107,7 @@ static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+ if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
@@ -4312,8 +5132,6 @@ static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
return -ENOTSOCK;
ret = __sys_shutdown_sock(sock, req->shutdown.how);
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
#else
@@ -4327,9 +5145,6 @@ static int __io_splice_prep(struct io_kiocb *req,
struct io_splice *sp = &req->splice;
unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
sp->len = READ_ONCE(sqe->len);
sp->flags = READ_ONCE(sqe->splice_flags);
if (unlikely(sp->flags & ~valid_flags))
@@ -4374,7 +5189,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
done:
if (ret != sp->len)
req_set_fail(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, 0, ret, 0);
return 0;
}
@@ -4419,7 +5234,20 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
done:
if (ret != sp->len)
req_set_fail(req);
- io_req_complete(req, ret);
+ __io_req_complete(req, 0, ret, 0);
+ return 0;
+}
+
+static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ /*
+ * If the ring is setup with CQE32, relay back addr/addr
+ */
+ if (req->ctx->flags & IORING_SETUP_CQE32) {
+ req->nop.extra1 = READ_ONCE(sqe->addr);
+ req->nop.extra2 = READ_ONCE(sqe->addr2);
+ }
+
return 0;
}
@@ -4428,20 +5256,31 @@ done:
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_ring_ctx *ctx = req->ctx;
+ unsigned int cflags;
+ void __user *buf;
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
+ if (req->flags & REQ_F_BUFFER_SELECT) {
+ size_t len = 1;
+
+ buf = io_buffer_select(req, &len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ }
- __io_req_complete(req, issue_flags, 0, 0);
+ cflags = io_put_kbuf(req, issue_flags);
+ if (!(req->ctx->flags & IORING_SETUP_CQE32))
+ __io_req_complete(req, issue_flags, 0, cflags);
+ else
+ __io_req_complete32(req, issue_flags, 0, cflags,
+ req->nop.extra1, req->nop.extra2);
return 0;
}
static int io_msg_ring_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (unlikely(sqe->addr || sqe->ioprio || sqe->rw_flags ||
- sqe->splice_fd_in || sqe->buf_index || sqe->personality))
+ if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in ||
+ sqe->buf_index || sqe->personality))
return -EINVAL;
req->msg.user_data = READ_ONCE(sqe->off);
@@ -4477,17 +5316,15 @@ done:
if (ret < 0)
req_set_fail(req);
__io_req_complete(req, issue_flags, ret, 0);
+ /* put file to avoid an attempt to IOPOLL the req */
+ io_put_file(req->file);
+ req->file = NULL;
return 0;
}
static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
- sqe->splice_fd_in))
+ if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -4511,8 +5348,6 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
@@ -4520,10 +5355,7 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
- sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -4541,9 +5373,7 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
return -EAGAIN;
ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
req->sync.len);
- if (ret < 0)
- req_set_fail(req);
- else
+ if (ret >= 0)
fsnotify_modify(req->file);
io_req_complete(req, ret);
return 0;
@@ -4554,9 +5384,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -4611,6 +5439,61 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_openat_prep(req, sqe);
}
+static int io_file_bitmap_get(struct io_ring_ctx *ctx)
+{
+ struct io_file_table *table = &ctx->file_table;
+ unsigned long nr = ctx->nr_user_files;
+ int ret;
+
+ if (table->alloc_hint >= nr)
+ table->alloc_hint = 0;
+
+ do {
+ ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
+ if (ret != nr) {
+ table->alloc_hint = ret + 1;
+ return ret;
+ }
+ if (!table->alloc_hint)
+ break;
+
+ nr = table->alloc_hint;
+ table->alloc_hint = 0;
+ } while (1);
+
+ return -ENFILE;
+}
+
+static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
+ struct file *file, unsigned int file_slot)
+{
+ bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
+ struct io_ring_ctx *ctx = req->ctx;
+ int ret;
+
+ if (alloc_slot) {
+ io_ring_submit_lock(ctx, issue_flags);
+ ret = io_file_bitmap_get(ctx);
+ if (unlikely(ret < 0)) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ return ret;
+ }
+
+ file_slot = ret;
+ } else {
+ file_slot--;
+ }
+
+ ret = io_install_fixed_file(req, file, issue_flags, file_slot);
+ if (alloc_slot) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ if (!ret)
+ return file_slot;
+ }
+
+ return ret;
+}
+
static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
@@ -4666,8 +5549,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
if (!fixed)
fd_install(ret, file);
else
- ret = io_install_fixed_file(req, file, issue_flags,
- req->open.file_slot - 1);
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ req->open.file_slot);
err:
putname(req->open.filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4688,7 +5571,7 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+ if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
sqe->splice_fd_in)
return -EINVAL;
@@ -4711,6 +5594,20 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
if (!nbufs)
return 0;
+ if (bl->buf_nr_pages) {
+ int j;
+
+ i = bl->buf_ring->tail - bl->head;
+ for (j = 0; j < bl->buf_nr_pages; j++)
+ unpin_user_page(bl->buf_pages[j]);
+ kvfree(bl->buf_pages);
+ bl->buf_pages = NULL;
+ bl->buf_nr_pages = 0;
+ /* make sure it's seen as empty */
+ INIT_LIST_HEAD(&bl->buf_list);
+ return i;
+ }
+
/* the head kbuf is the list itself */
while (!list_empty(&bl->buf_list)) {
struct io_buffer *nxt;
@@ -4732,22 +5629,23 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx;
struct io_buffer_list *bl;
int ret = 0;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
-
- io_ring_submit_lock(ctx, needs_lock);
- lockdep_assert_held(&ctx->uring_lock);
+ io_ring_submit_lock(ctx, issue_flags);
ret = -ENOENT;
bl = io_buffer_get_list(ctx, p->bgid);
- if (bl)
- ret = __io_remove_buffers(ctx, bl, p->nbufs);
+ if (bl) {
+ ret = -EINVAL;
+ /* can't use provide/remove buffers command on mapped buffers */
+ if (!bl->buf_nr_pages)
+ ret = __io_remove_buffers(ctx, bl, p->nbufs);
+ }
if (ret < 0)
req_set_fail(req);
/* complete before unlock, IOPOLL may need the lock */
__io_req_complete(req, issue_flags, ret, 0);
- io_ring_submit_unlock(ctx, needs_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
return 0;
}
@@ -4758,7 +5656,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+ if (sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -4855,26 +5753,56 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
return i ? 0 : -ENOMEM;
}
+static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
+{
+ int i;
+
+ ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list),
+ GFP_KERNEL);
+ if (!ctx->io_bl)
+ return -ENOMEM;
+
+ for (i = 0; i < BGID_ARRAY; i++) {
+ INIT_LIST_HEAD(&ctx->io_bl[i].buf_list);
+ ctx->io_bl[i].bgid = i;
+ }
+
+ return 0;
+}
+
static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_provide_buf *p = &req->pbuf;
struct io_ring_ctx *ctx = req->ctx;
struct io_buffer_list *bl;
int ret = 0;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
- io_ring_submit_lock(ctx, needs_lock);
+ io_ring_submit_lock(ctx, issue_flags);
- lockdep_assert_held(&ctx->uring_lock);
+ if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
+ ret = io_init_bl_list(ctx);
+ if (ret)
+ goto err;
+ }
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
- bl = kmalloc(sizeof(*bl), GFP_KERNEL);
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl) {
ret = -ENOMEM;
goto err;
}
- io_buffer_add_list(ctx, bl, p->bgid);
+ INIT_LIST_HEAD(&bl->buf_list);
+ ret = io_buffer_add_list(ctx, bl, p->bgid);
+ if (ret) {
+ kfree(bl);
+ goto err;
+ }
+ }
+ /* can't add buffers via this command for a mapped buffer ring */
+ if (bl->buf_nr_pages) {
+ ret = -EINVAL;
+ goto err;
}
ret = io_add_buffers(ctx, p, bl);
@@ -4883,7 +5811,7 @@ err:
req_set_fail(req);
/* complete before unlock, IOPOLL may need the lock */
__io_req_complete(req, issue_flags, ret, 0);
- io_ring_submit_unlock(ctx, needs_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
return 0;
}
@@ -4891,9 +5819,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_EPOLL)
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4937,9 +5863,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
return -EINVAL;
req->madvise.addr = READ_ONCE(sqe->addr);
@@ -4961,8 +5885,6 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
return -EAGAIN;
ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
#else
@@ -4972,9 +5894,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
- return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
return -EINVAL;
req->fadvise.offset = READ_ONCE(sqe->off);
@@ -5010,9 +5930,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
const char __user *path;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -5048,19 +5966,13 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
ctx->buffer);
-
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
- sqe->rw_flags || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -5092,7 +6004,8 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
spin_unlock(&files->file_lock);
goto err;
}
- file = fdt->fd[close->fd];
+ file = rcu_dereference_protected(fdt->fd[close->fd],
+ lockdep_is_held(&files->file_lock));
if (!file || file->f_op == &io_uring_fops) {
spin_unlock(&files->file_lock);
file = NULL;
@@ -5126,12 +6039,7 @@ err:
static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
- sqe->splice_fd_in))
+ if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -5150,13 +6058,18 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
- if (ret < 0)
- req_set_fail(req);
io_req_complete(req, ret);
return 0;
}
#if defined(CONFIG_NET)
+static bool io_net_retry(struct socket *sock, int flags)
+{
+ if (!(flags & MSG_WAITALL))
+ return false;
+ return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+}
+
static int io_setup_async_msg(struct io_kiocb *req,
struct io_async_msghdr *kmsg)
{
@@ -5202,11 +6115,16 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(sqe->file_index))
+ return -EINVAL;
+ if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+ sr->flags = READ_ONCE(sqe->addr2);
+ if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+ return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
@@ -5215,12 +6133,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
+ sr->done_io = 0;
return 0;
}
static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_msghdr iomsg, *kmsg;
+ struct io_sr_msg *sr = &req->sr_msg;
struct socket *sock;
unsigned flags;
int min_ret = 0;
@@ -5239,7 +6159,11 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg = &iomsg;
}
- flags = req->sr_msg.msg_flags;
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg);
+
+ flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
@@ -5252,12 +6176,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
return io_setup_async_msg(req, kmsg);
if (ret == -ERESTARTSYS)
ret = -EINTR;
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return io_setup_async_msg(req, kmsg);
+ }
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
__io_req_complete(req, issue_flags, ret, 0);
return 0;
}
@@ -5272,6 +6205,10 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
int min_ret = 0;
int ret;
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
@@ -5285,7 +6222,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_controllen = 0;
msg.msg_namelen = 0;
- flags = req->sr_msg.msg_flags;
+ flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
@@ -5298,8 +6235,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return -EAGAIN;
+ }
req_set_fail(req);
}
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
__io_req_complete(req, issue_flags, ret, 0);
return 0;
}
@@ -5391,14 +6339,6 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
return __io_recvmsg_copy_hdr(req, iomsg);
}
-static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
- unsigned int issue_flags)
-{
- struct io_sr_msg *sr = &req->sr_msg;
-
- return io_buffer_select(req, &sr->len, sr->bgid, issue_flags);
-}
-
static int io_recvmsg_prep_async(struct io_kiocb *req)
{
int ret;
@@ -5413,12 +6353,16 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(sqe->file_index))
+ return -EINVAL;
+ if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->bgid = READ_ONCE(sqe->buf_group);
+ sr->flags = READ_ONCE(sqe->addr2);
+ if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+ return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (sr->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
@@ -5431,19 +6375,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static bool io_net_retry(struct socket *sock, int flags)
-{
- if (!(flags & MSG_WAITALL))
- return false;
- return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
-}
-
static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_msghdr iomsg, *kmsg;
struct io_sr_msg *sr = &req->sr_msg;
struct socket *sock;
- struct io_buffer *kbuf;
+ unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -5461,24 +6398,30 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg = &iomsg;
}
- if (req->flags & REQ_F_BUFFER_SELECT) {
- kbuf = io_recv_buffer_select(req, issue_flags);
- if (IS_ERR(kbuf))
- return PTR_ERR(kbuf);
- kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
- kmsg->fast_iov[0].iov_len = req->sr_msg.len;
- iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov,
- 1, req->sr_msg.len);
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg);
+
+ if (io_do_buffer_select(req)) {
+ void __user *buf;
+
+ buf = io_buffer_select(req, &sr->len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ kmsg->fast_iov[0].iov_base = buf;
+ kmsg->fast_iov[0].iov_len = sr->len;
+ iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
+ sr->len);
}
- flags = req->sr_msg.msg_flags;
+ flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
- ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
- kmsg->uaddr, flags);
+ kmsg->msg.msg_get_inq = 1;
+ ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock)
return io_setup_async_msg(req, kmsg);
@@ -5502,45 +6445,54 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
- __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+ cflags = io_put_kbuf(req, issue_flags);
+ if (kmsg->msg.msg_inq)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ __io_req_complete(req, issue_flags, ret, cflags);
return 0;
}
static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_buffer *kbuf;
struct io_sr_msg *sr = &req->sr_msg;
struct msghdr msg;
- void __user *buf = sr->buf;
struct socket *sock;
struct iovec iov;
+ unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
- if (req->flags & REQ_F_BUFFER_SELECT) {
- kbuf = io_recv_buffer_select(req, issue_flags);
- if (IS_ERR(kbuf))
- return PTR_ERR(kbuf);
- buf = u64_to_user_ptr(kbuf->addr);
+ if (io_do_buffer_select(req)) {
+ void __user *buf;
+
+ buf = io_buffer_select(req, &sr->len, issue_flags);
+ if (!buf)
+ return -ENOBUFS;
+ sr->buf = buf;
}
- ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
+ ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
if (unlikely(ret))
goto out_free;
msg.msg_name = NULL;
+ msg.msg_namelen = 0;
msg.msg_control = NULL;
+ msg.msg_get_inq = 1;
+ msg.msg_flags = 0;
msg.msg_controllen = 0;
- msg.msg_namelen = 0;
msg.msg_iocb = NULL;
- msg.msg_flags = 0;
- flags = req->sr_msg.msg_flags;
+ flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
if (flags & MSG_WAITALL)
@@ -5569,36 +6521,49 @@ out_free:
ret += sr->done_io;
else if (sr->done_io)
ret = sr->done_io;
- __io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
+ cflags = io_put_kbuf(req, issue_flags);
+ if (msg.msg_inq)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ __io_req_complete(req, issue_flags, ret, cflags);
return 0;
}
static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = &req->accept;
+ unsigned flags;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index)
+ if (sqe->len || sqe->buf_index)
return -EINVAL;
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
+ flags = READ_ONCE(sqe->ioprio);
+ if (flags & ~IORING_ACCEPT_MULTISHOT)
+ return -EINVAL;
accept->file_slot = READ_ONCE(sqe->file_index);
- if (accept->file_slot && (accept->flags & SOCK_CLOEXEC))
- return -EINVAL;
+ if (accept->file_slot) {
+ if (accept->flags & SOCK_CLOEXEC)
+ return -EINVAL;
+ if (flags & IORING_ACCEPT_MULTISHOT &&
+ accept->file_slot != IORING_FILE_INDEX_ALLOC)
+ return -EINVAL;
+ }
if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+ if (flags & IORING_ACCEPT_MULTISHOT)
+ req->flags |= REQ_F_APOLL_MULTISHOT;
return 0;
}
static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
+ struct io_ring_ctx *ctx = req->ctx;
struct io_accept *accept = &req->accept;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -5606,6 +6571,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
struct file *file;
int ret, fd;
+retry:
if (!fixed) {
fd = __get_unused_fd_flags(accept->flags, accept->nofile);
if (unlikely(fd < 0))
@@ -5617,7 +6583,89 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
- if (ret == -EAGAIN && force_nonblock)
+ if (ret == -EAGAIN && force_nonblock) {
+ /*
+ * if it's multishot and polled, we don't need to
+ * return EAGAIN to arm the poll infra since it
+ * has already been done
+ */
+ if ((req->flags & IO_APOLL_MULTI_POLLED) ==
+ IO_APOLL_MULTI_POLLED)
+ ret = 0;
+ return ret;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ accept->file_slot);
+ }
+
+ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+ __io_req_complete(req, issue_flags, ret, 0);
+ return 0;
+ }
+ if (ret >= 0) {
+ bool filled;
+
+ spin_lock(&ctx->completion_lock);
+ filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+ IORING_CQE_F_MORE);
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ if (filled) {
+ io_cqring_ev_posted(ctx);
+ goto retry;
+ }
+ ret = -ECANCELED;
+ }
+
+ return ret;
+}
+
+static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_socket *sock = &req->sock;
+
+ if (sqe->addr || sqe->rw_flags || sqe->buf_index)
+ return -EINVAL;
+
+ sock->domain = READ_ONCE(sqe->fd);
+ sock->type = READ_ONCE(sqe->off);
+ sock->protocol = READ_ONCE(sqe->len);
+ sock->file_slot = READ_ONCE(sqe->file_index);
+ sock->nofile = rlimit(RLIMIT_NOFILE);
+
+ sock->flags = sock->type & ~SOCK_TYPE_MASK;
+ if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
+ return -EINVAL;
+ if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ return 0;
+}
+
+static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_socket *sock = &req->sock;
+ bool fixed = !!sock->file_slot;
+ struct file *file;
+ int ret, fd;
+
+ if (!fixed) {
+ fd = __get_unused_fd_flags(sock->flags, sock->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -5627,7 +6675,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
ret = fd;
} else {
ret = io_install_fixed_file(req, file, issue_flags,
- accept->file_slot - 1);
+ sock->file_slot - 1);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -5645,10 +6693,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_connect *conn = &req->connect;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
- sqe->splice_fd_in)
+ if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -5721,6 +6766,7 @@ IO_NETOP_PREP_ASYNC(sendmsg);
IO_NETOP_PREP_ASYNC(recvmsg);
IO_NETOP_PREP_ASYNC(connect);
IO_NETOP_PREP(accept);
+IO_NETOP_PREP(socket);
IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
@@ -5771,7 +6817,7 @@ static void io_poll_req_insert(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
struct hlist_head *list;
- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+ list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)];
hlist_add_head(&req->hash_node, list);
}
@@ -5830,22 +6876,23 @@ static void io_poll_remove_entries(struct io_kiocb *req)
rcu_read_unlock();
}
+static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags);
/*
* All poll tw should go through this. Checks for poll events, manages
* references, does rewait, etc.
*
* Returns a negative error on failure. >0 when no action require, which is
* either spurious wakeup or multishot CQE is served. 0 when it's done with
- * the request, then the mask is stored in req->result.
+ * the request, then the mask is stored in req->cqe.res.
*/
-static int io_poll_check_events(struct io_kiocb *req, bool locked)
+static int io_poll_check_events(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- int v;
+ int v, ret;
/* req->task == current here, checking PF_EXITING is safe */
if (unlikely(req->task->flags & PF_EXITING))
- io_poll_mark_cancelled(req);
+ return -ECANCELED;
do {
v = atomic_read(&req->poll_refs);
@@ -5856,32 +6903,46 @@ static int io_poll_check_events(struct io_kiocb *req, bool locked)
if (v & IO_POLL_CANCEL_FLAG)
return -ECANCELED;
- if (!req->result) {
+ if (!req->cqe.res) {
struct poll_table_struct pt = { ._key = req->apoll_events };
unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
if (unlikely(!io_assign_file(req, flags)))
return -EBADF;
- req->result = vfs_poll(req->file, &pt) & req->apoll_events;
+ req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
}
- /* multishot, just fill an CQE and proceed */
- if (req->result && !(req->apoll_events & EPOLLONESHOT)) {
- __poll_t mask = mangle_poll(req->result & req->apoll_events);
+ if ((unlikely(!req->cqe.res)))
+ continue;
+ if (req->apoll_events & EPOLLONESHOT)
+ return 0;
+
+ /* multishot, just fill a CQE and proceed */
+ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+ __poll_t mask = mangle_poll(req->cqe.res &
+ req->apoll_events);
bool filled;
spin_lock(&ctx->completion_lock);
- filled = io_fill_cqe_aux(ctx, req->user_data, mask,
- IORING_CQE_F_MORE);
+ filled = io_fill_cqe_aux(ctx, req->cqe.user_data,
+ mask, IORING_CQE_F_MORE);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
- if (unlikely(!filled))
- return -ECANCELED;
- io_cqring_ev_posted(ctx);
- } else if (req->result) {
- return 0;
+ if (filled) {
+ io_cqring_ev_posted(ctx);
+ continue;
+ }
+ return -ECANCELED;
}
+ io_tw_lock(req->ctx, locked);
+ if (unlikely(req->task->flags & PF_EXITING))
+ return -EFAULT;
+ ret = io_issue_sqe(req,
+ IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
+ if (ret)
+ return ret;
+
/*
* Release all references, retry if someone tried to restart
* task_work while we were executing it.
@@ -5896,21 +6957,21 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_poll_check_events(req, *locked);
+ ret = io_poll_check_events(req, locked);
if (ret > 0)
return;
if (!ret) {
- req->result = mangle_poll(req->result & req->poll.events);
+ req->cqe.res = mangle_poll(req->cqe.res & req->poll.events);
} else {
- req->result = ret;
+ req->cqe.res = ret;
req_set_fail(req);
}
io_poll_remove_entries(req);
spin_lock(&ctx->completion_lock);
hash_del(&req->hash_node);
- __io_req_complete_post(req, req->result, 0);
+ __io_req_complete_post(req, req->cqe.res, 0);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
@@ -5921,7 +6982,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_poll_check_events(req, *locked);
+ ret = io_poll_check_events(req, locked);
if (ret > 0)
return;
@@ -5936,9 +6997,9 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
io_req_complete_failed(req, ret);
}
-static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
+static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
{
- req->result = mask;
+ req->cqe.res = mask;
/*
* This is useful for poll that is armed on behalf of another
* request, and where the wakeup path could be on a different
@@ -5951,11 +7012,12 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
else
req->io_task_work.func = io_apoll_task_func;
- trace_io_uring_task_add(req->ctx, req, req->user_data, req->opcode, mask);
- io_req_task_work_add(req, false);
+ trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask);
+ io_req_task_work_add(req);
}
-static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
+static inline void io_poll_execute(struct io_kiocb *req, int res,
+ __poll_t events)
{
if (io_poll_get_ownership(req))
__io_poll_execute(req, res, events);
@@ -5970,6 +7032,7 @@ static void io_poll_cancel_req(struct io_kiocb *req)
#define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1))
#define wqe_is_double(wait) ((unsigned long) (wait)->private & 1)
+#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
@@ -6004,7 +7067,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
}
/* for instances that support it check for an event match first */
- if (mask && !(mask & poll->events))
+ if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON)))
return 0;
if (io_poll_get_ownership(req)) {
@@ -6090,6 +7153,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
int v;
INIT_HLIST_NODE(&req->hash_node);
+ req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
io_init_poll_iocb(poll, mask, io_poll_wake);
poll->file = req->file;
@@ -6160,28 +7224,34 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
struct io_ring_ctx *ctx = req->ctx;
struct async_poll *apoll;
struct io_poll_table ipt;
- __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
+ __poll_t mask = POLLPRI | POLLERR;
int ret;
if (!def->pollin && !def->pollout)
return IO_APOLL_ABORTED;
- if (!file_can_poll(req->file) || (req->flags & REQ_F_POLLED))
+ if (!file_can_poll(req->file))
+ return IO_APOLL_ABORTED;
+ if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
return IO_APOLL_ABORTED;
+ if (!(req->flags & REQ_F_APOLL_MULTISHOT))
+ mask |= EPOLLONESHOT;
if (def->pollin) {
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
if ((req->opcode == IORING_OP_RECVMSG) &&
(req->sr_msg.msg_flags & MSG_ERRQUEUE))
- mask &= ~POLLIN;
+ mask &= ~EPOLLIN;
} else {
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
}
if (def->poll_exclusive)
mask |= EPOLLEXCLUSIVE;
- if (!(issue_flags & IO_URING_F_UNLOCKED) &&
- !list_empty(&ctx->apoll_cache)) {
+ if (req->flags & REQ_F_POLLED) {
+ apoll = req->apoll;
+ } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
+ !list_empty(&ctx->apoll_cache)) {
apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
poll.wait.entry);
list_del_init(&apoll->poll.wait.entry);
@@ -6201,7 +7271,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
if (ret || ipt.error)
return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
- trace_io_uring_poll_arm(ctx, req, req->user_data, req->opcode,
+ trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode,
mask, apoll->poll.events);
return IO_APOLL_OK;
}
@@ -6234,24 +7304,53 @@ static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx,
return found;
}
-static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
- bool poll_only)
+static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
+ struct io_cancel_data *cd)
__must_hold(&ctx->completion_lock)
{
struct hlist_head *list;
struct io_kiocb *req;
- list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
+ list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)];
hlist_for_each_entry(req, list, hash_node) {
- if (sqe_addr != req->user_data)
+ if (cd->data != req->cqe.user_data)
continue;
if (poll_only && req->opcode != IORING_OP_POLL_ADD)
continue;
+ if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
+ if (cd->seq == req->work.cancel_seq)
+ continue;
+ req->work.cancel_seq = cd->seq;
+ }
return req;
}
return NULL;
}
+static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
+ struct io_cancel_data *cd)
+ __must_hold(&ctx->completion_lock)
+{
+ struct io_kiocb *req;
+ int i;
+
+ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+ struct hlist_head *list;
+
+ list = &ctx->cancel_hash[i];
+ hlist_for_each_entry(req, list, hash_node) {
+ if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+ req->file != cd->file)
+ continue;
+ if (cd->seq == req->work.cancel_seq)
+ continue;
+ req->work.cancel_seq = cd->seq;
+ return req;
+ }
+ }
+ return NULL;
+}
+
static bool io_poll_disarm(struct io_kiocb *req)
__must_hold(&ctx->completion_lock)
{
@@ -6262,12 +7361,15 @@ static bool io_poll_disarm(struct io_kiocb *req)
return true;
}
-static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
- bool poll_only)
+static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
__must_hold(&ctx->completion_lock)
{
- struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only);
+ struct io_kiocb *req;
+ if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY))
+ req = io_poll_file_find(ctx, cd);
+ else
+ req = io_poll_find(ctx, false, cd);
if (!req)
return -ENOENT;
io_poll_cancel_req(req);
@@ -6294,9 +7396,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
struct io_poll_update *upd = &req->poll_update;
u32 flags;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
flags = READ_ONCE(sqe->len);
if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -6326,9 +7426,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
struct io_poll_iocb *poll = &req->poll;
u32 flags;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
+ if (sqe->buf_index || sqe->off || sqe->addr)
return -EINVAL;
flags = READ_ONCE(sqe->len);
if (flags & ~IORING_POLL_ADD_MULTI)
@@ -6358,13 +7456,14 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
{
+ struct io_cancel_data cd = { .data = req->poll_update.old_user_data, };
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *preq;
int ret2, ret = 0;
bool locked;
spin_lock(&ctx->completion_lock);
- preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
+ preq = io_poll_find(ctx, true, &cd);
if (!preq || !io_poll_disarm(preq)) {
spin_unlock(&ctx->completion_lock);
ret = preq ? -EALREADY : -ENOENT;
@@ -6380,7 +7479,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
preq->poll.events |= IO_POLL_UNMASK;
}
if (req->poll_update.update_user_data)
- preq->user_data = req->poll_update.new_user_data;
+ preq->cqe.user_data = req->poll_update.new_user_data;
ret2 = io_poll_add(preq, issue_flags);
/* successfully updated, don't complete poll request */
@@ -6389,7 +7488,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
}
req_set_fail(preq);
- preq->result = -ECANCELED;
+ preq->cqe.res = -ECANCELED;
locked = !(issue_flags & IO_URING_F_UNLOCKED);
io_req_task_complete(preq, &locked);
out:
@@ -6417,14 +7516,14 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
req_set_fail(req);
- req->result = -ETIME;
+ req->cqe.res = -ETIME;
req->io_task_work.func = io_req_task_complete;
- io_req_task_work_add(req, false);
+ io_req_task_work_add(req);
return HRTIMER_NORESTART;
}
static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
- __u64 user_data)
+ struct io_cancel_data *cd)
__must_hold(&ctx->timeout_lock)
{
struct io_timeout_data *io;
@@ -6432,9 +7531,16 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
bool found = false;
list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
- found = user_data == req->user_data;
- if (found)
- break;
+ if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+ cd->data != req->cqe.user_data)
+ continue;
+ if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+ if (cd->seq == req->work.cancel_seq)
+ continue;
+ req->work.cancel_seq = cd->seq;
+ }
+ found = true;
+ break;
}
if (!found)
return ERR_PTR(-ENOENT);
@@ -6446,11 +7552,14 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
return req;
}
-static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
+static int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
__must_hold(&ctx->completion_lock)
- __must_hold(&ctx->timeout_lock)
{
- struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+ struct io_kiocb *req;
+
+ spin_lock_irq(&ctx->timeout_lock);
+ req = io_timeout_extract(ctx, cd);
+ spin_unlock_irq(&ctx->timeout_lock);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -6483,7 +7592,7 @@ static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
bool found = false;
list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
- found = user_data == req->user_data;
+ found = user_data == req->cqe.user_data;
if (found)
break;
}
@@ -6503,7 +7612,8 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
struct timespec64 *ts, enum hrtimer_mode mode)
__must_hold(&ctx->timeout_lock)
{
- struct io_kiocb *req = io_timeout_extract(ctx, user_data);
+ struct io_cancel_data cd = { .data = user_data, };
+ struct io_kiocb *req = io_timeout_extract(ctx, &cd);
struct io_timeout_data *data;
if (IS_ERR(req))
@@ -6523,11 +7633,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
{
struct io_timeout_rem *tr = &req->timeout_rem;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
return -EINVAL;
tr->ltimeout = false;
@@ -6568,10 +7676,10 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
int ret;
if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
+ struct io_cancel_data cd = { .data = tr->addr, };
+
spin_lock(&ctx->completion_lock);
- spin_lock_irq(&ctx->timeout_lock);
- ret = io_timeout_cancel(ctx, tr->addr);
- spin_unlock_irq(&ctx->timeout_lock);
+ ret = io_timeout_cancel(ctx, &cd);
spin_unlock(&ctx->completion_lock);
} else {
enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
@@ -6597,10 +7705,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
unsigned flags;
u32 off = READ_ONCE(sqe->off);
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
- sqe->splice_fd_in)
+ if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
return -EINVAL;
if (off && is_timeout_link)
return -EINVAL;
@@ -6699,30 +7804,42 @@ add:
return 0;
}
-struct io_cancel_data {
- struct io_ring_ctx *ctx;
- u64 user_data;
-};
-
static bool io_cancel_cb(struct io_wq_work *work, void *data)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_cancel_data *cd = data;
- return req->ctx == cd->ctx && req->user_data == cd->user_data;
+ if (req->ctx != cd->ctx)
+ return false;
+ if (cd->flags & IORING_ASYNC_CANCEL_ANY) {
+ ;
+ } else if (cd->flags & IORING_ASYNC_CANCEL_FD) {
+ if (req->file != cd->file)
+ return false;
+ } else {
+ if (req->cqe.user_data != cd->data)
+ return false;
+ }
+ if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+ if (cd->seq == req->work.cancel_seq)
+ return false;
+ req->work.cancel_seq = cd->seq;
+ }
+ return true;
}
-static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
- struct io_ring_ctx *ctx)
+static int io_async_cancel_one(struct io_uring_task *tctx,
+ struct io_cancel_data *cd)
{
- struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, };
enum io_wq_cancel cancel_ret;
int ret = 0;
+ bool all;
if (!tctx || !tctx->io_wq)
return -ENOENT;
- cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false);
+ all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+ cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
switch (cancel_ret) {
case IO_WQ_CANCEL_OK:
ret = 0;
@@ -6738,14 +7855,14 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
return ret;
}
-static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
+static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
- ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
+ ret = io_async_cancel_one(req->task->io_uring, cd);
/*
* Fall-through even for -EALREADY, as we may have poll armed
* that need unarming.
@@ -6754,56 +7871,98 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
return 0;
spin_lock(&ctx->completion_lock);
- ret = io_poll_cancel(ctx, sqe_addr, false);
+ ret = io_poll_cancel(ctx, cd);
if (ret != -ENOENT)
goto out;
-
- spin_lock_irq(&ctx->timeout_lock);
- ret = io_timeout_cancel(ctx, sqe_addr);
- spin_unlock_irq(&ctx->timeout_lock);
+ if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
+ ret = io_timeout_cancel(ctx, cd);
out:
spin_unlock(&ctx->completion_lock);
return ret;
}
+#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
+ IORING_ASYNC_CANCEL_ANY)
+
static int io_async_cancel_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
- sqe->splice_fd_in)
+ if (sqe->off || sqe->len || sqe->splice_fd_in)
return -EINVAL;
req->cancel.addr = READ_ONCE(sqe->addr);
+ req->cancel.flags = READ_ONCE(sqe->cancel_flags);
+ if (req->cancel.flags & ~CANCEL_FLAGS)
+ return -EINVAL;
+ if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) {
+ if (req->cancel.flags & IORING_ASYNC_CANCEL_ANY)
+ return -EINVAL;
+ req->cancel.fd = READ_ONCE(sqe->fd);
+ }
+
return 0;
}
-static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req,
+ unsigned int issue_flags)
{
- struct io_ring_ctx *ctx = req->ctx;
- u64 sqe_addr = req->cancel.addr;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
+ bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
+ struct io_ring_ctx *ctx = cd->ctx;
struct io_tctx_node *node;
- int ret;
+ int ret, nr = 0;
- ret = io_try_cancel_userdata(req, sqe_addr);
- if (ret != -ENOENT)
- goto done;
+ do {
+ ret = io_try_cancel(req, cd);
+ if (ret == -ENOENT)
+ break;
+ if (!all)
+ return ret;
+ nr++;
+ } while (1);
/* slow path, try all io-wq's */
- io_ring_submit_lock(ctx, needs_lock);
+ io_ring_submit_lock(ctx, issue_flags);
ret = -ENOENT;
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
struct io_uring_task *tctx = node->task->io_uring;
- ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
- if (ret != -ENOENT)
- break;
+ ret = io_async_cancel_one(tctx, cd);
+ if (ret != -ENOENT) {
+ if (!all)
+ break;
+ nr++;
+ }
}
- io_ring_submit_unlock(ctx, needs_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
+ return all ? nr : ret;
+}
+
+static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_cancel_data cd = {
+ .ctx = req->ctx,
+ .data = req->cancel.addr,
+ .flags = req->cancel.flags,
+ .seq = atomic_inc_return(&req->ctx->cancel_seq),
+ };
+ int ret;
+
+ if (cd.flags & IORING_ASYNC_CANCEL_FD) {
+ if (req->flags & REQ_F_FIXED_FILE)
+ req->file = io_file_get_fixed(req, req->cancel.fd,
+ issue_flags);
+ else
+ req->file = io_file_get_normal(req, req->cancel.fd);
+ if (!req->file) {
+ ret = -EBADF;
+ goto done;
+ }
+ cd.file = req->file;
+ }
+
+ ret = __io_async_cancel(&cd, req, issue_flags);
done:
if (ret < 0)
req_set_fail(req);
@@ -6816,7 +7975,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
{
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
+ if (sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -6830,7 +7989,6 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
struct io_uring_rsrc_update2 up;
int ret;
@@ -6841,10 +7999,10 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
up.resv = 0;
up.resv2 = 0;
- io_ring_submit_lock(ctx, needs_lock);
+ io_ring_submit_lock(ctx, issue_flags);
ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
&up, req->rsrc_update.nr_args);
- io_ring_submit_unlock(ctx, needs_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
if (ret < 0)
req_set_fail(req);
@@ -6856,7 +8014,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
switch (req->opcode) {
case IORING_OP_NOP:
- return 0;
+ return io_nop_prep(req, sqe);
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
case IORING_OP_READ:
@@ -6930,6 +8088,18 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_linkat_prep(req, sqe);
case IORING_OP_MSG_RING:
return io_msg_ring_prep(req, sqe);
+ case IORING_OP_FSETXATTR:
+ return io_fsetxattr_prep(req, sqe);
+ case IORING_OP_SETXATTR:
+ return io_setxattr_prep(req, sqe);
+ case IORING_OP_FGETXATTR:
+ return io_fgetxattr_prep(req, sqe);
+ case IORING_OP_GETXATTR:
+ return io_getxattr_prep(req, sqe);
+ case IORING_OP_SOCKET:
+ return io_socket_prep(req, sqe);
+ case IORING_OP_URING_CMD:
+ return io_uring_cmd_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6939,7 +8109,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static int io_req_prep_async(struct io_kiocb *req)
{
- if (!io_op_defs[req->opcode].needs_async_setup)
+ const struct io_op_def *def = &io_op_defs[req->opcode];
+
+ /* assign early for deferred execution for non-fixed file */
+ if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE))
+ req->file = io_file_get_normal(req, req->cqe.fd);
+ if (!def->needs_async_setup)
return 0;
if (WARN_ON_ONCE(req_has_async_data(req)))
return -EFAULT;
@@ -6957,6 +8132,8 @@ static int io_req_prep_async(struct io_kiocb *req)
return io_recvmsg_prep_async(req);
case IORING_OP_CONNECT:
return io_connect_prep_async(req);
+ case IORING_OP_URING_CMD:
+ return io_uring_cmd_prep_async(req);
}
printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
req->opcode);
@@ -6966,9 +8143,10 @@ static int io_req_prep_async(struct io_kiocb *req)
static u32 io_get_sequence(struct io_kiocb *req)
{
u32 seq = req->ctx->cached_sq_head;
+ struct io_kiocb *cur;
/* need original cached_sq_head, but it was increased for each req */
- io_for_each_link(req, req)
+ io_for_each_link(cur, req)
seq--;
return seq;
}
@@ -7011,7 +8189,7 @@ fail:
goto queue;
}
- trace_io_uring_defer(ctx, req, req->user_data, req->opcode);
+ trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode);
de->req = req;
de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
@@ -7073,6 +8251,12 @@ static void io_clean_op(struct io_kiocb *req)
if (req->statx.filename)
putname(req->statx.filename);
break;
+ case IORING_OP_SETXATTR:
+ case IORING_OP_FSETXATTR:
+ case IORING_OP_GETXATTR:
+ case IORING_OP_FGETXATTR:
+ __io_xattr_finish(req);
+ break;
}
}
if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -7095,15 +8279,11 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
return true;
if (req->flags & REQ_F_FIXED_FILE)
- req->file = io_file_get_fixed(req, req->fd, issue_flags);
+ req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
else
- req->file = io_file_get_normal(req, req->fd);
- if (req->file)
- return true;
+ req->file = io_file_get_normal(req, req->cqe.fd);
- req_set_fail(req);
- req->result = -EBADF;
- return false;
+ return !!req->file;
}
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
@@ -7233,6 +8413,24 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_MSG_RING:
ret = io_msg_ring(req, issue_flags);
break;
+ case IORING_OP_FSETXATTR:
+ ret = io_fsetxattr(req, issue_flags);
+ break;
+ case IORING_OP_SETXATTR:
+ ret = io_setxattr(req, issue_flags);
+ break;
+ case IORING_OP_FGETXATTR:
+ ret = io_fgetxattr(req, issue_flags);
+ break;
+ case IORING_OP_GETXATTR:
+ ret = io_getxattr(req, issue_flags);
+ break;
+ case IORING_OP_SOCKET:
+ ret = io_socket(req, issue_flags);
+ break;
+ case IORING_OP_URING_CMD:
+ ret = io_uring_cmd(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
@@ -7266,7 +8464,6 @@ static void io_wq_submit_work(struct io_wq_work *work)
const struct io_op_def *def = &io_op_defs[req->opcode];
unsigned int issue_flags = IO_URING_F_UNLOCKED;
bool needs_poll = false;
- struct io_kiocb *timeout;
int ret = 0, err = -ECANCELED;
/* one will be dropped by ->io_free_work() after returning to io-wq */
@@ -7275,10 +8472,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
else
req_ref_get(req);
- timeout = io_prep_linked_timeout(req);
- if (timeout)
- io_queue_linked_timeout(timeout);
-
+ io_arm_ltimeout(req);
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (work->flags & IO_WQ_WORK_CANCEL) {
@@ -7311,6 +8505,8 @@ fail:
* wait for request slots on the block side.
*/
if (!needs_poll) {
+ if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
+ break;
cond_resched();
continue;
}
@@ -7356,8 +8552,7 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
struct file *file = NULL;
unsigned long file_ptr;
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_lock(&ctx->uring_lock);
+ io_ring_submit_lock(ctx, issue_flags);
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
goto out;
@@ -7368,9 +8563,9 @@ static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
io_req_set_rsrc_node(req, ctx, 0);
+ WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap));
out:
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_unlock(&ctx->uring_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
return file;
}
@@ -7391,7 +8586,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
{
struct file *file = fget(fd);
- trace_io_uring_file_get(req->ctx, req, req->user_data, fd);
+ trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd);
/* we don't allow fixed io_uring files */
if (file && file->f_op == &io_uring_fops)
@@ -7405,8 +8600,14 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
int ret = -ENOENT;
if (prev) {
- if (!(req->task->flags & PF_EXITING))
- ret = io_try_cancel_userdata(req, prev->user_data);
+ if (!(req->task->flags & PF_EXITING)) {
+ struct io_cancel_data cd = {
+ .ctx = req->ctx,
+ .data = prev->cqe.user_data,
+ };
+
+ ret = io_try_cancel(req, &cd);
+ }
io_req_complete_post(req, ret ?: -ETIME, 0);
io_put_req(prev);
} else {
@@ -7440,7 +8641,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
spin_unlock_irqrestore(&ctx->timeout_lock, flags);
req->io_task_work.func = io_req_task_link_timeout;
- io_req_task_work_add(req, false);
+ io_req_task_work_add(req);
return HRTIMER_NORESTART;
}
@@ -7466,10 +8667,17 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
io_put_req(req);
}
-static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
+static void io_queue_async(struct io_kiocb *req, int ret)
__must_hold(&req->ctx->uring_lock)
{
- struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
+ struct io_kiocb *linked_timeout;
+
+ if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
+ io_req_complete_failed(req, ret);
+ return;
+ }
+
+ linked_timeout = io_prep_linked_timeout(req);
switch (io_arm_poll_handler(req, 0)) {
case IO_APOLL_READY:
@@ -7480,7 +8688,7 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
- io_queue_async_work(req, NULL);
+ io_queue_iowq(req, NULL);
break;
case IO_APOLL_OK:
break;
@@ -7490,10 +8698,9 @@ static void io_queue_sqe_arm_apoll(struct io_kiocb *req)
io_queue_linked_timeout(linked_timeout);
}
-static inline void __io_queue_sqe(struct io_kiocb *req)
+static inline void io_queue_sqe(struct io_kiocb *req)
__must_hold(&req->ctx->uring_lock)
{
- struct io_kiocb *linked_timeout;
int ret;
ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
@@ -7506,22 +8713,23 @@ static inline void __io_queue_sqe(struct io_kiocb *req)
* We async punt it if the file wasn't marked NOWAIT, or if the file
* doesn't support non-blocking read/write attempts
*/
- if (likely(!ret)) {
- linked_timeout = io_prep_linked_timeout(req);
- if (linked_timeout)
- io_queue_linked_timeout(linked_timeout);
- } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
- io_queue_sqe_arm_apoll(req);
- } else {
- io_req_complete_failed(req, ret);
- }
+ if (likely(!ret))
+ io_arm_ltimeout(req);
+ else
+ io_queue_async(req, ret);
}
static void io_queue_sqe_fallback(struct io_kiocb *req)
__must_hold(&req->ctx->uring_lock)
{
- if (req->flags & REQ_F_FAIL) {
- io_req_complete_fail_submit(req);
+ if (unlikely(req->flags & REQ_F_FAIL)) {
+ /*
+ * We don't submit, fail them all, for that replace hardlinks
+ * with normal links. Extra REQ_F_LINK is tolerated.
+ */
+ req->flags &= ~REQ_F_HARDLINK;
+ req->flags |= REQ_F_LINK;
+ io_req_complete_failed(req, req->cqe.res);
} else if (unlikely(req->ctx->drain_active)) {
io_drain_req(req);
} else {
@@ -7530,19 +8738,10 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
if (unlikely(ret))
io_req_complete_failed(req, ret);
else
- io_queue_async_work(req, NULL);
+ io_queue_iowq(req, NULL);
}
}
-static inline void io_queue_sqe(struct io_kiocb *req)
- __must_hold(&req->ctx->uring_lock)
-{
- if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))))
- __io_queue_sqe(req);
- else
- io_queue_sqe_fallback(req);
-}
-
/*
* Check SQE restrictions (opcode and flags).
*
@@ -7597,9 +8796,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->opcode = opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags = sqe_flags = READ_ONCE(sqe->flags);
- req->user_data = READ_ONCE(sqe->user_data);
+ req->cqe.user_data = READ_ONCE(sqe->user_data);
req->file = NULL;
- req->fixed_rsrc_refs = NULL;
+ req->rsrc_node = NULL;
req->task = current;
if (unlikely(opcode >= IORING_OP_LAST)) {
@@ -7610,9 +8809,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
/* enforce forwards compatibility on users */
if (sqe_flags & ~SQE_VALID_FLAGS)
return -EINVAL;
- if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
- !io_op_defs[opcode].buffer_select)
- return -EOPNOTSUPP;
+ if (sqe_flags & IOSQE_BUFFER_SELECT) {
+ if (!io_op_defs[opcode].buffer_select)
+ return -EOPNOTSUPP;
+ req->buf_index = READ_ONCE(sqe->buf_group);
+ }
if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
ctx->drain_disabled = true;
if (sqe_flags & IOSQE_IO_DRAIN) {
@@ -7635,10 +8836,15 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
}
+ if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+ return -EINVAL;
+ if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+
if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state;
- req->fd = READ_ONCE(sqe->fd);
+ req->cqe.fd = READ_ONCE(sqe->fd);
/*
* Plug now if we have more than 2 IO left after this, and the
@@ -7670,7 +8876,44 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
return io_req_prep(req, sqe);
}
-static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
+ struct io_kiocb *req, int ret)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_submit_link *link = &ctx->submit_state.link;
+ struct io_kiocb *head = link->head;
+
+ trace_io_uring_req_failed(sqe, ctx, req, ret);
+
+ /*
+ * Avoid breaking links in the middle as it renders links with SQPOLL
+ * unusable. Instead of failing eagerly, continue assembling the link if
+ * applicable and mark the head with REQ_F_FAIL. The link flushing code
+ * should find the flag and handle the rest.
+ */
+ req_fail_link_node(req, ret);
+ if (head && !(head->flags & REQ_F_FAIL))
+ req_fail_link_node(head, -ECANCELED);
+
+ if (!(req->flags & IO_REQ_LINK_FLAGS)) {
+ if (head) {
+ link->last->link = req;
+ link->head = NULL;
+ req = head;
+ }
+ io_queue_sqe_fallback(req);
+ return ret;
+ }
+
+ if (head)
+ link->last->link = req;
+ else
+ link->head = req;
+ link->last = req;
+ return 0;
+}
+
+static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
__must_hold(&ctx->uring_lock)
{
@@ -7678,35 +8921,11 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
int ret;
ret = io_init_req(ctx, req, sqe);
- if (unlikely(ret)) {
- trace_io_uring_req_failed(sqe, ctx, req, ret);
-
- /* fail even hard links since we don't submit */
- if (link->head) {
- /*
- * we can judge a link req is failed or cancelled by if
- * REQ_F_FAIL is set, but the head is an exception since
- * it may be set REQ_F_FAIL because of other req's failure
- * so let's leverage req->result to distinguish if a head
- * is set REQ_F_FAIL because of its failure or other req's
- * failure so that we can set the correct ret code for it.
- * init result here to avoid affecting the normal path.
- */
- if (!(link->head->flags & REQ_F_FAIL))
- req_fail_link_node(link->head, -ECANCELED);
- } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
- /*
- * the current req is a normal req, we should return
- * error and thus break the submittion loop.
- */
- io_req_complete_failed(req, ret);
- return ret;
- }
- req_fail_link_node(req, ret);
- }
+ if (unlikely(ret))
+ return io_submit_fail_init(sqe, req, ret);
/* don't need @sqe from now on */
- trace_io_uring_submit_sqe(ctx, req, req->user_data, req->opcode,
+ trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode,
req->flags, true,
ctx->flags & IORING_SETUP_SQPOLL);
@@ -7717,29 +8936,32 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
* submitted sync once the chain is complete. If none of those
* conditions are true (normal request), then just queue it.
*/
- if (link->head) {
- struct io_kiocb *head = link->head;
-
- if (!(req->flags & REQ_F_FAIL)) {
- ret = io_req_prep_async(req);
- if (unlikely(ret)) {
- req_fail_link_node(req, ret);
- if (!(head->flags & REQ_F_FAIL))
- req_fail_link_node(head, -ECANCELED);
- }
- }
- trace_io_uring_link(ctx, req, head);
+ if (unlikely(link->head)) {
+ ret = io_req_prep_async(req);
+ if (unlikely(ret))
+ return io_submit_fail_init(sqe, req, ret);
+
+ trace_io_uring_link(ctx, req, link->head);
link->last->link = req;
link->last = req;
- if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
+ if (req->flags & IO_REQ_LINK_FLAGS)
return 0;
- /* last request of a link, enqueue the link */
+ /* last request of the link, flush it */
+ req = link->head;
link->head = NULL;
- req = head;
- } else if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
- link->head = req;
- link->last = req;
+ if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))
+ goto fallback;
+
+ } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS |
+ REQ_F_FORCE_ASYNC | REQ_F_FAIL))) {
+ if (req->flags & IO_REQ_LINK_FLAGS) {
+ link->head = req;
+ link->last = req;
+ } else {
+fallback:
+ io_queue_sqe_fallback(req);
+ }
return 0;
}
@@ -7754,8 +8976,8 @@ static void io_submit_state_end(struct io_ring_ctx *ctx)
{
struct io_submit_state *state = &ctx->submit_state;
- if (state->link.head)
- io_queue_sqe(state->link.head);
+ if (unlikely(state->link.head))
+ io_queue_sqe_fallback(state->link.head);
/* flush only after queuing links as they can generate completions */
io_submit_flush_completions(ctx);
if (state->plug_started)
@@ -7809,8 +9031,12 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
* though the application is the one updating it.
*/
head = READ_ONCE(ctx->sq_array[sq_idx]);
- if (likely(head < ctx->sq_entries))
+ if (likely(head < ctx->sq_entries)) {
+ /* double index for 128-byte SQEs, twice as long */
+ if (ctx->flags & IORING_SETUP_SQE128)
+ head <<= 1;
return &ctx->sq_sqes[head];
+ }
/* drop invalid entries */
ctx->cq_extra--;
@@ -7823,54 +9049,52 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
__must_hold(&ctx->uring_lock)
{
unsigned int entries = io_sqring_entries(ctx);
- int submitted = 0;
+ unsigned int left;
+ int ret;
if (unlikely(!entries))
return 0;
/* make sure SQ entry isn't read before tail */
- nr = min3(nr, ctx->sq_entries, entries);
- io_get_task_refs(nr);
+ ret = left = min3(nr, ctx->sq_entries, entries);
+ io_get_task_refs(left);
+ io_submit_state_start(&ctx->submit_state, left);
- io_submit_state_start(&ctx->submit_state, nr);
do {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
- if (unlikely(!io_alloc_req_refill(ctx))) {
- if (!submitted)
- submitted = -EAGAIN;
+ if (unlikely(!io_alloc_req_refill(ctx)))
break;
- }
req = io_alloc_req(ctx);
sqe = io_get_sqe(ctx);
if (unlikely(!sqe)) {
- wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list);
+ io_req_add_to_cache(req, ctx);
break;
}
- /* will complete beyond this point, count as submitted */
- submitted++;
- if (io_submit_sqe(ctx, req, sqe)) {
- /*
- * Continue submitting even for sqe failure if the
- * ring was setup with IORING_SETUP_SUBMIT_ALL
- */
- if (!(ctx->flags & IORING_SETUP_SUBMIT_ALL))
- break;
- }
- } while (submitted < nr);
- if (unlikely(submitted != nr)) {
- int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
- int unused = nr - ref_used;
+ /*
+ * Continue submitting even for sqe failure if the
+ * ring was setup with IORING_SETUP_SUBMIT_ALL
+ */
+ if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
+ !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
+ left--;
+ break;
+ }
+ } while (--left);
- current->io_uring->cached_refs += unused;
+ if (unlikely(left)) {
+ ret -= left;
+ /* try again if it submitted nothing and can't allocate a req */
+ if (!ret && io_req_cache_empty(ctx))
+ ret = -EAGAIN;
+ current->io_uring->cached_refs += left;
}
io_submit_state_end(ctx);
/* Commit SQ ring head once we've consumed and submitted all SQEs */
io_commit_sqring(ctx);
-
- return submitted;
+ return ret;
}
static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
@@ -7878,23 +9102,6 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
return READ_ONCE(sqd->state);
}
-static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
-{
- /* Tell userspace we may need a wakeup call */
- spin_lock(&ctx->completion_lock);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
- spin_unlock(&ctx->completion_lock);
-}
-
-static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
-{
- spin_lock(&ctx->completion_lock);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
- spin_unlock(&ctx->completion_lock);
-}
-
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
{
unsigned int to_submit;
@@ -8010,8 +9217,8 @@ static int io_sq_thread(void *data)
bool needs_sched = true;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
- io_ring_set_wakeup_flag(ctx);
-
+ atomic_or(IORING_SQ_NEED_WAKEUP,
+ &ctx->rings->sq_flags);
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
!wq_list_empty(&ctx->iopoll_list)) {
needs_sched = false;
@@ -8022,7 +9229,7 @@ static int io_sq_thread(void *data)
* Ensure the store of the wakeup flag is not
* reordered with the load of the SQ tail
*/
- smp_mb();
+ smp_mb__after_atomic();
if (io_sqring_entries(ctx)) {
needs_sched = false;
@@ -8036,7 +9243,8 @@ static int io_sq_thread(void *data)
mutex_lock(&sqd->lock);
}
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
- io_ring_clear_wakeup_flag(ctx);
+ atomic_andnot(IORING_SQ_NEED_WAKEUP,
+ &ctx->rings->sq_flags);
}
finish_wait(&sqd->wait, &wait);
@@ -8046,7 +9254,7 @@ static int io_sq_thread(void *data)
io_uring_cancel_generic(true, sqd);
sqd->thread = NULL;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
- io_ring_set_wakeup_flag(ctx);
+ atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
io_run_task_work();
mutex_unlock(&sqd->lock);
@@ -8086,7 +9294,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
* Cannot safely flush overflowed CQEs from here, ensure we wake up
* the task, and the next invocation will do it.
*/
- if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow))
+ if (io_should_wake(iowq) ||
+ test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &iowq->ctx->check_cq))
return autoremove_wake_function(curr, mode, wake_flags, key);
return -1;
}
@@ -8108,15 +9317,18 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
ktime_t timeout)
{
int ret;
+ unsigned long check_cq;
/* make sure we run task_work before checking for signals */
ret = io_run_task_work_sig();
if (ret || io_should_wake(iowq))
return ret;
+ check_cq = READ_ONCE(ctx->check_cq);
/* let the caller flush overflows, retry */
- if (test_bit(0, &ctx->check_cq_overflow))
+ if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT))
return 1;
-
+ if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)))
+ return -EBADR;
if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
return -ETIME;
return 1;
@@ -8181,10 +9393,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
TASK_INTERRUPTIBLE);
ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
- finish_wait(&ctx->cq_wait, &iowq.wq);
cond_resched();
} while (ret > 0);
+ finish_wait(&ctx->cq_wait, &iowq.wq);
restore_saved_sigmask_unless(ret == -EINTR);
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
@@ -8422,17 +9634,57 @@ static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
{
table->files = kvcalloc(nr_files, sizeof(table->files[0]),
GFP_KERNEL_ACCOUNT);
- return !!table->files;
+ if (unlikely(!table->files))
+ return false;
+
+ table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT);
+ if (unlikely(!table->bitmap)) {
+ kvfree(table->files);
+ return false;
+ }
+
+ return true;
}
static void io_free_file_tables(struct io_file_table *table)
{
kvfree(table->files);
+ bitmap_free(table->bitmap);
table->files = NULL;
+ table->bitmap = NULL;
+}
+
+static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
+{
+ WARN_ON_ONCE(test_bit(bit, table->bitmap));
+ __set_bit(bit, table->bitmap);
+ if (bit == table->alloc_hint)
+ table->alloc_hint++;
+}
+
+static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
+{
+ __clear_bit(bit, table->bitmap);
+ table->alloc_hint = bit;
}
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+#if !defined(IO_URING_SCM_ALL)
+ int i;
+
+ for (i = 0; i < ctx->nr_user_files; i++) {
+ struct file *file = io_file_from_index(ctx, i);
+
+ if (!file)
+ continue;
+ if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM)
+ continue;
+ io_file_bitmap_clear(&ctx->file_table, i);
+ fput(file);
+ }
+#endif
+
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
struct sock *sock = ctx->ring_sock->sk;
@@ -8441,16 +9693,6 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
kfree_skb(skb);
}
-#else
- int i;
-
- for (i = 0; i < ctx->nr_user_files; i++) {
- struct file *file;
-
- file = io_file_from_index(ctx, i);
- if (file)
- fput(file);
- }
#endif
io_free_file_tables(&ctx->file_table);
io_rsrc_data_free(ctx->file_data);
@@ -8595,107 +9837,66 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
return sqd;
}
-#if defined(CONFIG_UNIX)
/*
* Ensure the UNIX gc is aware of our file set, so we are certain that
* the io_uring can be safely unregistered on process exit, even if we have
- * loops in the file referencing.
+ * loops in the file referencing. We account only files that can hold other
+ * files because otherwise they can't form a loop and so are not interesting
+ * for GC.
*/
-static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
+static int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
{
+#if defined(CONFIG_UNIX)
struct sock *sk = ctx->ring_sock->sk;
+ struct sk_buff_head *head = &sk->sk_receive_queue;
struct scm_fp_list *fpl;
struct sk_buff *skb;
- int i, nr_files;
- fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
- if (!fpl)
- return -ENOMEM;
-
- skb = alloc_skb(0, GFP_KERNEL);
- if (!skb) {
- kfree(fpl);
- return -ENOMEM;
- }
+ if (likely(!io_file_need_scm(file)))
+ return 0;
- skb->sk = sk;
+ /*
+ * See if we can merge this file into an existing skb SCM_RIGHTS
+ * file set. If there's no room, fall back to allocating a new skb
+ * and filling it in.
+ */
+ spin_lock_irq(&head->lock);
+ skb = skb_peek(head);
+ if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
+ __skb_unlink(skb, head);
+ else
+ skb = NULL;
+ spin_unlock_irq(&head->lock);
- nr_files = 0;
- fpl->user = get_uid(current_user());
- for (i = 0; i < nr; i++) {
- struct file *file = io_file_from_index(ctx, i + offset);
+ if (!skb) {
+ fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+ if (!fpl)
+ return -ENOMEM;
- if (!file)
- continue;
- fpl->fp[nr_files] = get_file(file);
- unix_inflight(fpl->user, fpl->fp[nr_files]);
- nr_files++;
- }
+ skb = alloc_skb(0, GFP_KERNEL);
+ if (!skb) {
+ kfree(fpl);
+ return -ENOMEM;
+ }
- if (nr_files) {
+ fpl->user = get_uid(current_user());
fpl->max = SCM_MAX_FD;
- fpl->count = nr_files;
+ fpl->count = 0;
+
UNIXCB(skb).fp = fpl;
+ skb->sk = sk;
skb->destructor = unix_destruct_scm;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
- skb_queue_head(&sk->sk_receive_queue, skb);
-
- for (i = 0; i < nr; i++) {
- struct file *file = io_file_from_index(ctx, i + offset);
-
- if (file)
- fput(file);
- }
- } else {
- kfree_skb(skb);
- free_uid(fpl->user);
- kfree(fpl);
- }
-
- return 0;
-}
-
-/*
- * If UNIX sockets are enabled, fd passing can cause a reference cycle which
- * causes regular reference counting to break down. We rely on the UNIX
- * garbage collection to take care of this problem for us.
- */
-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
-{
- unsigned left, total;
- int ret = 0;
-
- total = 0;
- left = ctx->nr_user_files;
- while (left) {
- unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
-
- ret = __io_sqe_files_scm(ctx, this_files, total);
- if (ret)
- break;
- left -= this_files;
- total += this_files;
- }
-
- if (!ret)
- return 0;
-
- while (total < ctx->nr_user_files) {
- struct file *file = io_file_from_index(ctx, total);
-
- if (file)
- fput(file);
- total++;
}
- return ret;
-}
-#else
-static int io_sqe_files_scm(struct io_ring_ctx *ctx)
-{
+ fpl = UNIXCB(skb).fp;
+ fpl->fp[fpl->count++] = get_file(file);
+ unix_inflight(fpl->user, file);
+ skb_queue_head(head, skb);
+ fput(file);
+#endif
return 0;
}
-#endif
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
{
@@ -8706,6 +9907,11 @@ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
struct sk_buff *skb;
int i;
+ if (!io_file_need_scm(file)) {
+ fput(file);
+ return;
+ }
+
__skb_queue_head_init(&list);
/*
@@ -8770,15 +9976,17 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
list_del(&prsrc->list);
if (prsrc->tag) {
- bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
+ if (ctx->flags & IORING_SETUP_IOPOLL)
+ mutex_lock(&ctx->uring_lock);
- io_ring_submit_lock(ctx, lock_ring);
spin_lock(&ctx->completion_lock);
io_fill_cqe_aux(ctx, prsrc->tag, 0, 0);
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
- io_ring_submit_unlock(ctx, lock_ring);
+
+ if (ctx->flags & IORING_SETUP_IOPOLL)
+ mutex_unlock(&ctx->uring_lock);
}
rsrc_data->do_put(ctx, prsrc);
@@ -8832,27 +10040,31 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
if (ret)
return ret;
- ret = -ENOMEM;
- if (!io_alloc_file_tables(&ctx->file_table, nr_args))
- goto out_free;
+ if (!io_alloc_file_tables(&ctx->file_table, nr_args)) {
+ io_rsrc_data_free(ctx->file_data);
+ ctx->file_data = NULL;
+ return -ENOMEM;
+ }
for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
- if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
+ struct io_fixed_file *file_slot;
+
+ if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) {
ret = -EFAULT;
- goto out_fput;
+ goto fail;
}
/* allow sparse sets */
- if (fd == -1) {
+ if (!fds || fd == -1) {
ret = -EINVAL;
if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
- goto out_fput;
+ goto fail;
continue;
}
file = fget(fd);
ret = -EBADF;
if (unlikely(!file))
- goto out_fput;
+ goto fail;
/*
* Don't allow io_uring instances to be registered. If UNIX
@@ -8863,74 +10075,23 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
*/
if (file->f_op == &io_uring_fops) {
fput(file);
- goto out_fput;
+ goto fail;
}
- io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
- }
-
- ret = io_sqe_files_scm(ctx);
- if (ret) {
- __io_sqe_files_unregister(ctx);
- return ret;
- }
-
- io_rsrc_node_switch(ctx, NULL);
- return ret;
-out_fput:
- for (i = 0; i < ctx->nr_user_files; i++) {
- file = io_file_from_index(ctx, i);
- if (file)
+ ret = io_scm_file_account(ctx, file);
+ if (ret) {
fput(file);
- }
- io_free_file_tables(&ctx->file_table);
- ctx->nr_user_files = 0;
-out_free:
- io_rsrc_data_free(ctx->file_data);
- ctx->file_data = NULL;
- return ret;
-}
-
-static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
- int index)
-{
-#if defined(CONFIG_UNIX)
- struct sock *sock = ctx->ring_sock->sk;
- struct sk_buff_head *head = &sock->sk_receive_queue;
- struct sk_buff *skb;
-
- /*
- * See if we can merge this file into an existing skb SCM_RIGHTS
- * file set. If there's no room, fall back to allocating a new skb
- * and filling it in.
- */
- spin_lock_irq(&head->lock);
- skb = skb_peek(head);
- if (skb) {
- struct scm_fp_list *fpl = UNIXCB(skb).fp;
-
- if (fpl->count < SCM_MAX_FD) {
- __skb_unlink(skb, head);
- spin_unlock_irq(&head->lock);
- fpl->fp[fpl->count] = get_file(file);
- unix_inflight(fpl->user, fpl->fp[fpl->count]);
- fpl->count++;
- spin_lock_irq(&head->lock);
- __skb_queue_head(head, skb);
- } else {
- skb = NULL;
+ goto fail;
}
- }
- spin_unlock_irq(&head->lock);
-
- if (skb) {
- fput(file);
- return 0;
+ file_slot = io_fixed_file_slot(&ctx->file_table, i);
+ io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
}
- return __io_sqe_files_scm(ctx, 1, index);
-#else
+ io_rsrc_node_switch(ctx, NULL);
return 0;
-#endif
+fail:
+ __io_sqe_files_unregister(ctx);
+ return ret;
}
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
@@ -8954,12 +10115,11 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index)
{
struct io_ring_ctx *ctx = req->ctx;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
bool needs_switch = false;
struct io_fixed_file *file_slot;
int ret = -EBADF;
- io_ring_submit_lock(ctx, needs_lock);
+ io_ring_submit_lock(ctx, issue_flags);
if (file->f_op == &io_uring_fops)
goto err;
ret = -ENXIO;
@@ -8985,22 +10145,20 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
if (ret)
goto err;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, slot_index);
needs_switch = true;
}
- *io_get_tag_slot(ctx->file_data, slot_index) = 0;
- io_fixed_file_set(file_slot, file);
- ret = io_sqe_file_register(ctx, file, slot_index);
- if (ret) {
- file_slot->file_ptr = 0;
- goto err;
+ ret = io_scm_file_account(ctx, file);
+ if (!ret) {
+ *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+ io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, slot_index);
}
-
- ret = 0;
err:
if (needs_switch)
io_rsrc_node_switch(ctx, ctx->file_data);
- io_ring_submit_unlock(ctx, needs_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
if (ret)
fput(file);
return ret;
@@ -9010,12 +10168,11 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
{
unsigned int offset = req->close.file_slot - 1;
struct io_ring_ctx *ctx = req->ctx;
- bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
struct io_fixed_file *file_slot;
struct file *file;
int ret;
- io_ring_submit_lock(ctx, needs_lock);
+ io_ring_submit_lock(ctx, issue_flags);
ret = -ENXIO;
if (unlikely(!ctx->file_data))
goto out;
@@ -9038,10 +10195,11 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
goto out;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, offset);
io_rsrc_node_switch(ctx, ctx->file_data);
ret = 0;
out:
- io_ring_submit_unlock(ctx, needs_lock);
+ io_ring_submit_unlock(ctx, issue_flags);
return ret;
}
@@ -9087,6 +10245,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (err)
break;
file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, i);
needs_switch = true;
}
if (fd != -1) {
@@ -9108,14 +10267,14 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- *io_get_tag_slot(data, i) = tag;
- io_fixed_file_set(file_slot, file);
- err = io_sqe_file_register(ctx, file, i);
+ err = io_scm_file_account(ctx, file);
if (err) {
- file_slot->file_ptr = 0;
fput(file);
break;
}
+ *io_get_tag_slot(data, i) = tag;
+ io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
}
}
@@ -9195,7 +10354,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
task->io_uring = tctx;
spin_lock_init(&tctx->task_lock);
INIT_WQ_LIST(&tctx->task_list);
- INIT_WQ_LIST(&tctx->prior_task_list);
+ INIT_WQ_LIST(&tctx->prio_task_list);
init_task_work(&tctx->task_work, tctx_task_work);
return 0;
}
@@ -9373,8 +10532,8 @@ static void *io_mem_alloc(size_t size)
return (void *) __get_free_pages(gfp, get_order(size));
}
-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
- size_t *sq_offset)
+static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
+ unsigned int cq_entries, size_t *sq_offset)
{
struct io_rings *rings;
size_t off, sq_array_size;
@@ -9382,6 +10541,10 @@ static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
off = struct_size(rings, cqes, cq_entries);
if (off == SIZE_MAX)
return SIZE_MAX;
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ if (check_shl_overflow(off, 1, &off))
+ return SIZE_MAX;
+ }
#ifdef CONFIG_SMP
off = ALIGN(off, SMP_CACHE_BYTES);
@@ -9543,30 +10706,18 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
return ret;
}
-static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
- struct io_mapped_ubuf **pimu,
- struct page **last_hpage)
+static struct page **io_pin_pages(unsigned long ubuf, unsigned long len,
+ int *npages)
{
- struct io_mapped_ubuf *imu = NULL;
+ unsigned long start, end, nr_pages;
struct vm_area_struct **vmas = NULL;
struct page **pages = NULL;
- unsigned long off, start, end, ubuf;
- size_t size;
- int ret, pret, nr_pages, i;
+ int i, pret, ret = -ENOMEM;
- if (!iov->iov_base) {
- *pimu = ctx->dummy_ubuf;
- return 0;
- }
-
- ubuf = (unsigned long) iov->iov_base;
- end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
start = ubuf >> PAGE_SHIFT;
nr_pages = end - start;
- *pimu = NULL;
- ret = -ENOMEM;
-
pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto done;
@@ -9576,10 +10727,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
if (!vmas)
goto done;
- imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
- if (!imu)
- goto done;
-
ret = 0;
mmap_read_lock(current->mm);
pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
@@ -9597,6 +10744,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
break;
}
}
+ *npages = nr_pages;
} else {
ret = pret < 0 ? pret : -EFAULT;
}
@@ -9610,14 +10758,53 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
unpin_user_pages(pages, pret);
goto done;
}
+ ret = 0;
+done:
+ kvfree(vmas);
+ if (ret < 0) {
+ kvfree(pages);
+ pages = ERR_PTR(ret);
+ }
+ return pages;
+}
+
+static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
+ struct io_mapped_ubuf **pimu,
+ struct page **last_hpage)
+{
+ struct io_mapped_ubuf *imu = NULL;
+ struct page **pages = NULL;
+ unsigned long off;
+ size_t size;
+ int ret, nr_pages, i;
+
+ if (!iov->iov_base) {
+ *pimu = ctx->dummy_ubuf;
+ return 0;
+ }
+
+ *pimu = NULL;
+ ret = -ENOMEM;
+
+ pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
+ &nr_pages);
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ pages = NULL;
+ goto done;
+ }
+
+ imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
+ if (!imu)
+ goto done;
- ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
+ ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
if (ret) {
- unpin_user_pages(pages, pret);
+ unpin_user_pages(pages, nr_pages);
goto done;
}
- off = ubuf & ~PAGE_MASK;
+ off = (unsigned long) iov->iov_base & ~PAGE_MASK;
size = iov->iov_len;
for (i = 0; i < nr_pages; i++) {
size_t vec_len;
@@ -9630,8 +10817,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
size -= vec_len;
}
/* store original address for later verification */
- imu->ubuf = ubuf;
- imu->ubuf_end = ubuf + iov->iov_len;
+ imu->ubuf = (unsigned long) iov->iov_base;
+ imu->ubuf_end = imu->ubuf + iov->iov_len;
imu->nr_bvecs = nr_pages;
*pimu = imu;
ret = 0;
@@ -9639,7 +10826,6 @@ done:
if (ret)
kvfree(imu);
kvfree(pages);
- kvfree(vmas);
return ret;
}
@@ -9698,12 +10884,17 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
}
for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
- ret = io_copy_iov(ctx, &iov, arg, i);
- if (ret)
- break;
- ret = io_buffer_validate(&iov);
- if (ret)
- break;
+ if (arg) {
+ ret = io_copy_iov(ctx, &iov, arg, i);
+ if (ret)
+ break;
+ ret = io_buffer_validate(&iov);
+ if (ret)
+ break;
+ } else {
+ memset(&iov, 0, sizeof(iov));
+ }
+
if (!iov.iov_base && *io_get_tag_slot(data, i)) {
ret = -EINVAL;
break;
@@ -9842,19 +11033,19 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
static void io_destroy_buffers(struct io_ring_ctx *ctx)
{
+ struct io_buffer_list *bl;
+ unsigned long index;
int i;
- for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
- struct list_head *list = &ctx->io_buffers[i];
-
- while (!list_empty(list)) {
- struct io_buffer_list *bl;
+ for (i = 0; i < BGID_ARRAY; i++) {
+ if (!ctx->io_bl)
+ break;
+ __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
+ }
- bl = list_first_entry(list, struct io_buffer_list, list);
- __io_remove_buffers(ctx, bl, -1U);
- list_del(&bl->list);
- kfree(bl);
- }
+ xa_for_each(&ctx->io_bl_xa, index, bl) {
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ __io_remove_buffers(ctx, bl, -1U);
}
while (!list_empty(&ctx->io_buffers_pages)) {
@@ -9874,7 +11065,7 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
mutex_lock(&ctx->uring_lock);
io_flush_cached_locked_reqs(ctx, state);
- while (state->free_list.next) {
+ while (!io_req_cache_empty(ctx)) {
struct io_wq_work_node *node;
struct io_kiocb *req;
@@ -9963,7 +11154,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_wq_put_hash(ctx->hash_map);
kfree(ctx->cancel_hash);
kfree(ctx->dummy_ubuf);
- kfree(ctx->io_buffers);
+ kfree(ctx->io_bl);
+ xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
}
@@ -9994,7 +11186,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
* Users may get EPOLLIN meanwhile seeing nothing in cqring, this
* pushs them to do the flush.
*/
- if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow))
+ if (io_cqring_events(ctx) ||
+ test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
mask |= EPOLLIN | EPOLLRDNORM;
return mask;
@@ -10126,8 +11319,7 @@ static __cold bool io_kill_timeouts(struct io_ring_ctx *ctx,
}
}
spin_unlock_irq(&ctx->timeout_lock);
- if (canceled != 0)
- io_commit_cqring(ctx);
+ io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
if (canceled != 0)
io_cqring_ev_posted(ctx);
@@ -10147,11 +11339,13 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_unregister_personality(ctx, index);
mutex_unlock(&ctx->uring_lock);
- io_kill_timeouts(ctx, NULL, true);
- io_poll_remove_all(ctx, NULL, true);
-
- /* if we failed setting up the ctx, we might not have any rings */
- io_iopoll_try_reap_events(ctx);
+ /* failed during ring init, it couldn't have issued any requests */
+ if (ctx->rings) {
+ io_kill_timeouts(ctx, NULL, true);
+ io_poll_remove_all(ctx, NULL, true);
+ /* if we failed setting up the ctx, we might not have any rings */
+ io_iopoll_try_reap_events(ctx);
+ }
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
/*
@@ -10243,6 +11437,10 @@ static __cold void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
struct io_uring_task *tctx = task ? task->io_uring : NULL;
+ /* failed during ring init, it couldn't have issued any requests */
+ if (!ctx->rings)
+ return;
+
while (1) {
enum io_wq_cancel cret;
bool ret = false;
@@ -10585,7 +11783,7 @@ static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
ret = -EFAULT;
break;
}
- if (reg.resv || reg.offset >= IO_RINGFD_REG_MAX) {
+ if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) {
ret = -EINVAL;
break;
}
@@ -10688,6 +11886,19 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
return 0;
}
+static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz)
+{
+ if (flags & IORING_ENTER_EXT_ARG) {
+ struct io_uring_getevents_arg arg;
+
+ if (argsz != sizeof(arg))
+ return -EINVAL;
+ if (copy_from_user(&arg, argp, sizeof(arg)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
struct __kernel_timespec __user **ts,
const sigset_t __user **sig)
@@ -10725,7 +11936,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
size_t, argsz)
{
struct io_ring_ctx *ctx;
- int submitted = 0;
struct fd f;
long ret;
@@ -10788,39 +11998,64 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (ret)
goto out;
}
- submitted = to_submit;
+ ret = to_submit;
} else if (to_submit) {
ret = io_uring_add_tctx_node(ctx);
if (unlikely(ret))
goto out;
- mutex_lock(&ctx->uring_lock);
- submitted = io_submit_sqes(ctx, to_submit);
- mutex_unlock(&ctx->uring_lock);
- if (submitted != to_submit)
+ mutex_lock(&ctx->uring_lock);
+ ret = io_submit_sqes(ctx, to_submit);
+ if (ret != to_submit) {
+ mutex_unlock(&ctx->uring_lock);
goto out;
+ }
+ if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
+ goto iopoll_locked;
+ mutex_unlock(&ctx->uring_lock);
}
if (flags & IORING_ENTER_GETEVENTS) {
- const sigset_t __user *sig;
- struct __kernel_timespec __user *ts;
-
- ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
- if (unlikely(ret))
- goto out;
+ int ret2;
+ if (ctx->syscall_iopoll) {
+ /*
+ * We disallow the app entering submit/complete with
+ * polling, but we still need to lock the ring to
+ * prevent racing with polled issue that got punted to
+ * a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
+iopoll_locked:
+ ret2 = io_validate_ext_arg(flags, argp, argsz);
+ if (likely(!ret2)) {
+ min_complete = min(min_complete,
+ ctx->cq_entries);
+ ret2 = io_iopoll_check(ctx, min_complete);
+ }
+ mutex_unlock(&ctx->uring_lock);
+ } else {
+ const sigset_t __user *sig;
+ struct __kernel_timespec __user *ts;
+
+ ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+ if (likely(!ret2)) {
+ min_complete = min(min_complete,
+ ctx->cq_entries);
+ ret2 = io_cqring_wait(ctx, min_complete, sig,
+ argsz, ts);
+ }
+ }
- min_complete = min(min_complete, ctx->cq_entries);
+ if (!ret) {
+ ret = ret2;
- /*
- * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
- * space applications don't need to do io completion events
- * polling again, they can rely on io_sq_thread to do polling
- * work, which can reduce cpu usage and uring_lock contention.
- */
- if (ctx->flags & IORING_SETUP_IOPOLL &&
- !(ctx->flags & IORING_SETUP_SQPOLL)) {
- ret = io_iopoll_check(ctx, min_complete);
- } else {
- ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
+ /*
+ * EBADR indicates that one or more CQE were dropped.
+ * Once the user has been informed we can clear the bit
+ * as they are obviously ok with those drops.
+ */
+ if (unlikely(ret2 == -EBADR))
+ clear_bit(IO_CHECK_CQ_DROPPED_BIT,
+ &ctx->check_cq);
}
}
@@ -10829,7 +12064,7 @@ out:
out_fput:
if (!(flags & IORING_ENTER_REGISTERED_RING))
fdput(f);
- return submitted ? submitted : ret;
+ return ret;
}
#ifdef CONFIG_PROC_FS
@@ -10876,10 +12111,15 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
unsigned int sq_tail = READ_ONCE(r->sq.tail);
unsigned int cq_head = READ_ONCE(r->cq.head);
unsigned int cq_tail = READ_ONCE(r->cq.tail);
+ unsigned int cq_shift = 0;
unsigned int sq_entries, cq_entries;
bool has_lock;
+ bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
unsigned int i;
+ if (is_cqe32)
+ cq_shift = 1;
+
/*
* we may get imprecise sqe and cqe info if uring is actively running
* since we get cached_sq_head and cached_cq_tail without uring_lock
@@ -10912,11 +12152,18 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
cq_entries = min(cq_tail - cq_head, ctx->cq_entries);
for (i = 0; i < cq_entries; i++) {
unsigned int entry = i + cq_head;
- struct io_uring_cqe *cqe = &r->cqes[entry & cq_mask];
+ struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift];
- seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
+ if (!is_cqe32) {
+ seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n",
entry & cq_mask, cqe->user_data, cqe->res,
cqe->flags);
+ } else {
+ seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, "
+ "extra1:%llu, extra2:%llu\n",
+ entry & cq_mask, cqe->user_data, cqe->res,
+ cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]);
+ }
}
/*
@@ -11019,7 +12266,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
ctx->sq_entries = p->sq_entries;
ctx->cq_entries = p->cq_entries;
- size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
+ size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset);
if (size == SIZE_MAX)
return -EOVERFLOW;
@@ -11034,7 +12281,10 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
rings->sq_ring_entries = p->sq_entries;
rings->cq_ring_entries = p->cq_entries;
- size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+ if (p->flags & IORING_SETUP_SQE128)
+ size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries);
+ else
+ size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
if (size == SIZE_MAX) {
io_mem_free(ctx->rings);
ctx->rings = NULL;
@@ -11146,11 +12396,41 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx = io_ring_ctx_alloc(p);
if (!ctx)
return -ENOMEM;
+
+ /*
+ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+ * space applications don't need to do io completion events
+ * polling again, they can rely on io_sq_thread to do polling
+ * work, which can reduce cpu usage and uring_lock contention.
+ */
+ if (ctx->flags & IORING_SETUP_IOPOLL &&
+ !(ctx->flags & IORING_SETUP_SQPOLL))
+ ctx->syscall_iopoll = 1;
+
ctx->compat = in_compat_syscall();
if (!capable(CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
/*
+ * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if
+ * COOP_TASKRUN is set, then IPIs are never needed by the app.
+ */
+ ret = -EINVAL;
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
+ /* IPI related flags don't make sense with SQPOLL */
+ if (ctx->flags & (IORING_SETUP_COOP_TASKRUN |
+ IORING_SETUP_TASKRUN_FLAG))
+ goto err;
+ ctx->notify_method = TWA_SIGNAL_NO_IPI;
+ } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) {
+ ctx->notify_method = TWA_SIGNAL_NO_IPI;
+ } else {
+ if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
+ goto err;
+ ctx->notify_method = TWA_SIGNAL;
+ }
+
+ /*
* This is just grabbed for accounting purposes. When a process exits,
* the mm is exited and dropped before the files, hence we need to hang
* on to this mm purely for the purposes of being able to unaccount
@@ -11247,10 +12527,12 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
- IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
+ IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
+ IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
+ IORING_SETUP_SQE128 | IORING_SETUP_CQE32))
return -EINVAL;
- return io_uring_create(entries, &p, params);
+ return io_uring_create(entries, &p, params);
}
SYSCALL_DEFINE2(io_uring_setup, u32, entries,
@@ -11463,14 +12745,20 @@ static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
memset(&rr, 0, sizeof(rr));
if (copy_from_user(&rr, arg, size))
return -EFAULT;
- if (!rr.nr || rr.resv || rr.resv2)
+ if (!rr.nr || rr.resv2)
+ return -EINVAL;
+ if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE)
return -EINVAL;
switch (type) {
case IORING_RSRC_FILE:
+ if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+ break;
return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
rr.nr, u64_to_user_ptr(rr.tags));
case IORING_RSRC_BUFFER:
+ if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
+ break;
return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
rr.nr, u64_to_user_ptr(rr.tags));
}
@@ -11605,6 +12893,85 @@ err:
return ret;
}
+static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+ struct io_uring_buf_ring *br;
+ struct io_uring_buf_reg reg;
+ struct io_buffer_list *bl;
+ struct page **pages;
+ int nr_pages;
+
+ if (copy_from_user(&reg, arg, sizeof(reg)))
+ return -EFAULT;
+
+ if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
+ return -EINVAL;
+ if (!reg.ring_addr)
+ return -EFAULT;
+ if (reg.ring_addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (!is_power_of_2(reg.ring_entries))
+ return -EINVAL;
+
+ if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
+ int ret = io_init_bl_list(ctx);
+ if (ret)
+ return ret;
+ }
+
+ bl = io_buffer_get_list(ctx, reg.bgid);
+ if (bl) {
+ /* if mapped buffer ring OR classic exists, don't allow */
+ if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
+ return -EEXIST;
+ } else {
+ bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ if (!bl)
+ return -ENOMEM;
+ }
+
+ pages = io_pin_pages(reg.ring_addr,
+ struct_size(br, bufs, reg.ring_entries),
+ &nr_pages);
+ if (IS_ERR(pages)) {
+ kfree(bl);
+ return PTR_ERR(pages);
+ }
+
+ br = page_address(pages[0]);
+ bl->buf_pages = pages;
+ bl->buf_nr_pages = nr_pages;
+ bl->nr_entries = reg.ring_entries;
+ bl->buf_ring = br;
+ bl->mask = reg.ring_entries - 1;
+ io_buffer_add_list(ctx, bl, reg.bgid);
+ return 0;
+}
+
+static int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
+{
+ struct io_uring_buf_reg reg;
+ struct io_buffer_list *bl;
+
+ if (copy_from_user(&reg, arg, sizeof(reg)))
+ return -EFAULT;
+ if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2])
+ return -EINVAL;
+
+ bl = io_buffer_get_list(ctx, reg.bgid);
+ if (!bl)
+ return -ENOENT;
+ if (!bl->buf_nr_pages)
+ return -EINVAL;
+
+ __io_remove_buffers(ctx, bl, -1U);
+ if (bl->bgid >= BGID_ARRAY) {
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ kfree(bl);
+ }
+ return 0;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -11630,6 +12997,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
switch (opcode) {
case IORING_REGISTER_BUFFERS:
+ ret = -EFAULT;
+ if (!arg)
+ break;
ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
break;
case IORING_UNREGISTER_BUFFERS:
@@ -11639,6 +13009,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_sqe_buffers_unregister(ctx);
break;
case IORING_REGISTER_FILES:
+ ret = -EFAULT;
+ if (!arg)
+ break;
ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
break;
case IORING_UNREGISTER_FILES:
@@ -11733,6 +13106,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
case IORING_UNREGISTER_RING_FDS:
ret = io_ringfd_unregister(ctx, arg, nr_args);
break;
+ case IORING_REGISTER_PBUF_RING:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_pbuf_ring(ctx, arg);
+ break;
+ case IORING_UNREGISTER_PBUF_RING:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_unregister_pbuf_ring(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
@@ -11809,6 +13194,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_SQE_ELEM(44, __u32, file_index);
+ BUILD_BUG_SQE_ELEM(48, __u64, addr3);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
sizeof(struct io_uring_rsrc_update));
@@ -11817,6 +13203,10 @@ static int __init io_uring_init(void)
/* ->buf_index is u16 */
BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+ BUILD_BUG_ON(BGID_ARRAY * sizeof(struct io_buffer_list) > PAGE_SIZE);
+ BUILD_BUG_ON(offsetof(struct io_uring_buf_ring, bufs) != 0);
+ BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) !=
+ offsetof(struct io_uring_buf_ring, tail));
/* should fit into one byte */
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
@@ -11826,6 +13216,10 @@ static int __init io_uring_init(void)
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
+ BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
+
+ BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
+
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT);
return 0;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b08f5dc31780..80f9b047aa1b 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter,
{
atomic_inc(&dio->ref);
- if (dio->iocb->ki_flags & IOCB_HIPRI) {
+ /* Sync dio can't be polled reliably */
+ if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
bio_set_polled(bio, dio->iocb);
dio->submit.poll_bio = bio;
}
@@ -265,8 +266,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
* cache flushes on IO completion.
*/
if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
- (dio->flags & IOMAP_DIO_WRITE_FUA) &&
- blk_queue_fua(bdev_get_queue(iomap->bdev)))
+ (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
use_fua = true;
}
@@ -654,9 +654,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!READ_ONCE(dio->submit.waiter))
break;
- if (!dio->submit.poll_bio ||
- !bio_poll(dio->submit.poll_bio, NULL, 0))
- blk_io_schedule();
+ blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5b9408e3b370..ac7f067b7bdd 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -488,7 +488,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_wait_updates(journal);
commit_transaction->t_state = T_SWITCH;
- write_unlock(&journal->j_state_lock);
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
@@ -508,6 +507,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* has reserved. This is consistent with the existing behaviour
* that multiple jbd2_journal_get_write_access() calls to the same
* buffer are perfectly permissible.
+ * We use journal->j_state_lock here to serialize processing of
+ * t_reserved_list with eviction of buffers from journal_unmap_buffer().
*/
while (commit_transaction->t_reserved_list) {
jh = commit_transaction->t_reserved_list;
@@ -527,6 +528,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_refile_buffer(journal, jh);
}
+ write_unlock(&journal->j_state_lock);
/*
* Now try to drop any written-back buffers from the journal's
* checkpoint lists. We do this *before* commit because it potentially
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fcacafa4510d..c0cbeeaec2d1 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1762,7 +1762,6 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
unsigned long block, log_offset; /* logical */
unsigned long long phys_block, block_start, block_stop; /* physical */
loff_t byte_start, byte_stop, byte_count;
- struct request_queue *q = bdev_get_queue(journal->j_dev);
/* flags must be set to either discard or zeroout */
if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
@@ -1770,10 +1769,8 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
return -EINVAL;
- if (!q)
- return -ENXIO;
-
- if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+ if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+ !bdev_max_discard_sectors(journal->j_dev))
return -EOPNOTSUPP;
/*
@@ -1828,7 +1825,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
err = blkdev_issue_discard(journal->j_dev,
byte_start >> SECTOR_SHIFT,
byte_count >> SECTOR_SHIFT,
- GFP_NOFS, 0);
+ GFP_NOFS);
} else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
err = blkdev_issue_zeroout(journal->j_dev,
byte_start >> SECTOR_SHIFT,
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 03a845ab4f00..1e7b177ece60 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -110,14 +110,13 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FITRIM:
{
struct super_block *sb = inode->i_sb;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
s64 ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q)) {
+ if (!bdev_max_discard_sectors(sb->s_bdev)) {
jfs_warn("FITRIM not supported on device");
return -EOPNOTSUPP;
}
@@ -127,7 +126,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EFAULT;
range.minlen = max_t(unsigned int, range.minlen,
- q->limits.discard_granularity);
+ bdev_discard_granularity(sb->s_bdev));
ret = jfs_ioc_trim(inode, &range);
if (ret < 0)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index f1a13a74cddf..85d4f44f2ac4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -372,19 +372,16 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
}
case Opt_discard:
- {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
/* if set to 1, even copying files will cause
* trimming :O
* -> user has more control over the online trimming
*/
sbi->minblks_trim = 64;
- if (blk_queue_discard(q))
+ if (bdev_max_discard_sectors(sb->s_bdev))
*flag |= JFS_DISCARD;
else
pr_err("JFS: discard option not supported on device\n");
break;
- }
case Opt_nodiscard:
*flag &= ~JFS_DISCARD;
@@ -392,10 +389,9 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
case Opt_discard_minblk:
{
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
char *minblks_trim = args[0].from;
int rc;
- if (blk_queue_discard(q)) {
+ if (bdev_max_discard_sectors(sb->s_bdev)) {
*flag |= JFS_DISCARD;
rc = kstrtouint(minblks_trim, 0,
&sbi->minblks_trim);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 61a8edc4ba8b..e205fde7163a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1406,7 +1406,12 @@ static void __kernfs_remove(struct kernfs_node *kn)
*/
void kernfs_remove(struct kernfs_node *kn)
{
- struct kernfs_root *root = kernfs_root(kn);
+ struct kernfs_root *root;
+
+ if (!kn)
+ return;
+
+ root = kernfs_root(kn);
down_write(&root->kernfs_rwsem);
__kernfs_remove(kn);
diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c
index 60e7ac62c917..1e2076a53bed 100644
--- a/fs/ksmbd/misc.c
+++ b/fs/ksmbd/misc.c
@@ -158,19 +158,41 @@ out:
* Return : windows path string or error
*/
-char *convert_to_nt_pathname(char *filename)
+char *convert_to_nt_pathname(struct ksmbd_share_config *share,
+ struct path *path)
{
- char *ab_pathname;
+ char *pathname, *ab_pathname, *nt_pathname;
+ int share_path_len = share->path_sz;
- if (strlen(filename) == 0)
- filename = "\\";
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return ERR_PTR(-EACCES);
- ab_pathname = kstrdup(filename, GFP_KERNEL);
- if (!ab_pathname)
- return NULL;
+ ab_pathname = d_path(path, pathname, PATH_MAX);
+ if (IS_ERR(ab_pathname)) {
+ nt_pathname = ERR_PTR(-EACCES);
+ goto free_pathname;
+ }
+
+ if (strncmp(ab_pathname, share->path, share_path_len)) {
+ nt_pathname = ERR_PTR(-EACCES);
+ goto free_pathname;
+ }
+
+ nt_pathname = kzalloc(strlen(&ab_pathname[share_path_len]) + 2, GFP_KERNEL);
+ if (!nt_pathname) {
+ nt_pathname = ERR_PTR(-ENOMEM);
+ goto free_pathname;
+ }
+ if (ab_pathname[share_path_len] == '\0')
+ strcpy(nt_pathname, "/");
+ strcat(nt_pathname, &ab_pathname[share_path_len]);
+
+ ksmbd_conv_path_to_windows(nt_pathname);
- ksmbd_conv_path_to_windows(ab_pathname);
- return ab_pathname;
+free_pathname:
+ kfree(pathname);
+ return nt_pathname;
}
int get_nlink(struct kstat *st)
diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h
index 253366bd0951..aae2a252945f 100644
--- a/fs/ksmbd/misc.h
+++ b/fs/ksmbd/misc.h
@@ -14,7 +14,8 @@ struct ksmbd_file;
int match_pattern(const char *str, size_t len, const char *pattern);
int ksmbd_validate_filename(char *filename);
int parse_stream_name(char *filename, char **stream_name, int *s_type);
-char *convert_to_nt_pathname(char *filename);
+char *convert_to_nt_pathname(struct ksmbd_share_config *share,
+ struct path *path);
int get_nlink(struct kstat *st);
void ksmbd_conv_path_to_unix(char *path);
void ksmbd_strip_last_slash(char *path);
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
index 23871b18a429..8b5560574d4c 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/ksmbd/oplock.c
@@ -1694,33 +1694,3 @@ out:
read_unlock(&lease_list_lock);
return ret_op;
}
-
-int smb2_check_durable_oplock(struct ksmbd_file *fp,
- struct lease_ctx_info *lctx, char *name)
-{
- struct oplock_info *opinfo = opinfo_get(fp);
- int ret = 0;
-
- if (opinfo && opinfo->is_lease) {
- if (!lctx) {
- pr_err("open does not include lease\n");
- ret = -EBADF;
- goto out;
- }
- if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key,
- SMB2_LEASE_KEY_SIZE)) {
- pr_err("invalid lease key\n");
- ret = -EBADF;
- goto out;
- }
- if (name && strcmp(fp->filename, name)) {
- pr_err("invalid name reconnect %s\n", name);
- ret = -EINVAL;
- goto out;
- }
- }
-out:
- if (opinfo)
- opinfo_put(opinfo);
- return ret;
-}
diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h
index 0cf7a2b5bbc0..09753448f779 100644
--- a/fs/ksmbd/oplock.h
+++ b/fs/ksmbd/oplock.h
@@ -124,6 +124,4 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
struct lease_ctx_info *lctx);
void destroy_lease_table(struct ksmbd_conn *conn);
-int smb2_check_durable_oplock(struct ksmbd_file *fp,
- struct lease_ctx_info *lctx, char *name);
#endif /* __KSMBD_OPLOCK_H */
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 3bf6c56c654c..16c803a9d996 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -11,6 +11,7 @@
#include <linux/statfs.h>
#include <linux/ethtool.h>
#include <linux/falloc.h>
+#include <linux/mount.h>
#include "glob.h"
#include "smbfsctl.h"
@@ -2918,7 +2919,6 @@ int smb2_open(struct ksmbd_work *work)
goto err_out;
}
- fp->filename = name;
fp->cdoption = req->CreateDisposition;
fp->daccess = daccess;
fp->saccess = req->ShareAccess;
@@ -3270,14 +3270,13 @@ err_out1:
if (!rsp->hdr.Status)
rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
- if (!fp || !fp->filename)
- kfree(name);
if (fp)
ksmbd_fd_put(work, fp);
smb2_set_err_rsp(work);
ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status);
}
+ kfree(name);
kfree(lc);
return 0;
@@ -3895,8 +3894,6 @@ int smb2_query_dir(struct ksmbd_work *work)
ksmbd_debug(SMB, "Search pattern is %s\n", srch_ptr);
}
- ksmbd_debug(SMB, "Directory name is %s\n", dir_fp->filename);
-
if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
ksmbd_debug(SMB, "Restart directory scan\n");
generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
@@ -4390,9 +4387,9 @@ static int get_file_all_info(struct ksmbd_work *work,
return -EACCES;
}
- filename = convert_to_nt_pathname(fp->filename);
- if (!filename)
- return -ENOMEM;
+ filename = convert_to_nt_pathname(work->tcon->share_conf, &fp->filp->f_path);
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
inode = file_inode(fp->filp);
generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
@@ -4999,15 +4996,17 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
case FS_SECTOR_SIZE_INFORMATION:
{
struct smb3_fs_ss_info *info;
+ unsigned int sector_size =
+ min_t(unsigned int, path.mnt->mnt_sb->s_blocksize, 4096);
info = (struct smb3_fs_ss_info *)(rsp->Buffer);
- info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize);
+ info->LogicalBytesPerSector = cpu_to_le32(sector_size);
info->PhysicalBytesPerSectorForAtomicity =
- cpu_to_le32(stfs.f_bsize);
- info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize);
+ cpu_to_le32(sector_size);
+ info->PhysicalBytesPerSectorForPerf = cpu_to_le32(sector_size);
info->FSEffPhysicalBytesPerSectorForAtomicity =
- cpu_to_le32(stfs.f_bsize);
+ cpu_to_le32(sector_size);
info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE |
SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE);
info->ByteOffsetForSectorAlignment = 0;
@@ -5683,8 +5682,7 @@ static int set_file_allocation_info(struct ksmbd_work *work,
size = i_size_read(inode);
rc = ksmbd_vfs_truncate(work, fp, alloc_blks * 512);
if (rc) {
- pr_err("truncate failed! filename : %s, err %d\n",
- fp->filename, rc);
+ pr_err("truncate failed!, err %d\n", rc);
return rc;
}
if (size < alloc_blks * 512)
@@ -5714,12 +5712,10 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
* truncated range.
*/
if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
- ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n",
- fp->filename, newsize);
+ ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize);
rc = ksmbd_vfs_truncate(work, fp, newsize);
if (rc) {
- ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n",
- fp->filename, rc);
+ ksmbd_debug(SMB, "truncate failed!, err %d\n", rc);
if (rc != -EAGAIN)
rc = -EBADF;
return rc;
@@ -5765,8 +5761,10 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
if (parent_fp) {
if (parent_fp->daccess & FILE_DELETE_LE) {
pr_err("parent dir is opened with delete access\n");
+ ksmbd_fd_put(work, parent_fp);
return -ESHARE;
}
+ ksmbd_fd_put(work, parent_fp);
}
next:
return smb2_rename(work, fp, user_ns, rename_info,
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 9cebb6ba555b..dcdd07c6efff 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -398,8 +398,7 @@ int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
nbytes = kernel_read(filp, rbuf, count, pos);
if (nbytes < 0) {
- pr_err("smb read failed for (%s), err = %zd\n",
- fp->filename, nbytes);
+ pr_err("smb read failed, err = %zd\n", nbytes);
return nbytes;
}
@@ -875,8 +874,7 @@ int ksmbd_vfs_truncate(struct ksmbd_work *work,
err = vfs_truncate(&filp->f_path, size);
if (err)
- pr_err("truncate failed for filename : %s err %d\n",
- fp->filename, err);
+ pr_err("truncate failed, err %d\n", err);
return err;
}
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
index 29c1db66bd0f..c4d59d2735f0 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/ksmbd/vfs_cache.c
@@ -328,7 +328,6 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
kfree(smb_lock);
}
- kfree(fp->filename);
if (ksmbd_stream_fd(fp))
kfree(fp->stream.name);
kmem_cache_free(filp_cache, fp);
@@ -497,6 +496,7 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
list_for_each_entry(lfp, &ci->m_fp_list, node) {
if (inode == file_inode(lfp->filp)) {
atomic_dec(&ci->m_count);
+ lfp = ksmbd_fp_get(lfp);
read_unlock(&ci->m_lock);
return lfp;
}
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
index 36239ce31afd..fcb13413fa8d 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/ksmbd/vfs_cache.h
@@ -62,7 +62,6 @@ struct ksmbd_inode {
struct ksmbd_file {
struct file *filp;
- char *filename;
u64 persistent_id;
u64 volatile_id;
diff --git a/fs/namespace.c b/fs/namespace.c
index a0a36bfa3aa0..afe2b64b14f1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4058,10 +4058,22 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
if (err) {
struct mount *p;
- for (p = mnt; p != m; p = next_mnt(p, mnt)) {
+ /*
+ * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will
+ * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all
+ * mounts and needs to take care to include the first mount.
+ */
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
/* If we had to hold writers unblock them. */
if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
mnt_unhold_writers(p);
+
+ /*
+ * We're done once the first mount we changed got
+ * MNT_WRITE_HOLD unset.
+ */
+ if (p == m)
+ break;
}
}
return err;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index e2d59bb5e6bb..9a16897e8dc6 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -517,7 +517,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
if (result.negated)
ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
else
- ctx->flags &= NFS_MOUNT_SOFTREVAL;
+ ctx->flags |= NFS_MOUNT_SOFTREVAL;
break;
case Opt_posix:
if (result.negated)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 16106f805ffa..a79f66432bd3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -363,6 +363,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
kunmap_atomic(start);
}
+static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version)
+{
+ if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) {
+ fattr->pre_change_attr = version;
+ fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
+ }
+}
+
static void nfs4_test_and_free_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
@@ -6553,7 +6561,9 @@ static void nfs4_delegreturn_release(void *calldata)
pnfs_roc_release(&data->lr.arg, &data->lr.res,
data->res.lr_ret);
if (inode) {
- nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+ nfs4_fattr_set_prechange(&data->fattr,
+ inode_peek_iversion_raw(inode));
+ nfs_refresh_inode(inode, &data->fattr);
nfs_iput_and_deactive(inode);
}
kfree(calldata);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fec194a666f4..87e1004b606d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1052,20 +1052,20 @@ out:
static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp)
{
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
- struct request_queue *q = bdev_get_queue(nilfs->ns_bdev);
struct fstrim_range range;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(nilfs->ns_bdev))
return -EOPNOTSUPP;
if (copy_from_user(&range, argp, sizeof(range)))
return -EFAULT;
- range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity);
+ range.minlen = max_t(u64, range.minlen,
+ bdev_discard_granularity(nilfs->ns_bdev));
down_read(&nilfs->ns_segctor_sem);
ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range);
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index e385cca2004a..77ff8e95421f 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -1100,7 +1100,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (ret < 0) {
put_bh(su_bh);
goto out_sem;
@@ -1134,7 +1134,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (!ret)
ndiscarded += nblocks;
}
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index dd48a8f74d57..3b4a079c9617 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -672,7 +672,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (ret < 0)
return ret;
nblocks = 0;
@@ -682,7 +682,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
ret = blkdev_issue_discard(nilfs->ns_bdev,
start * sects_per_block,
nblocks * sects_per_block,
- GFP_NOFS, 0);
+ GFP_NOFS);
return ret;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9b32b76a9c30..a792e21c5309 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1657,6 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
else
mnt = path.mnt;
+ /*
+ * FAN_RENAME is not allowed on non-dir (for now).
+ * We shouldn't have allowed setting any dirent events in mask of
+ * non-dir, but because we always allowed it, error only if group
+ * was initialized with the new flag FAN_REPORT_TARGET_FID.
+ */
+ ret = -ENOTDIR;
+ if (inode && !S_ISDIR(inode->i_mode) &&
+ ((mask & FAN_RENAME) ||
+ ((mask & FANOTIFY_DIRENT_EVENTS) &&
+ FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
+ goto path_put_and_out;
+
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 787b53b984ee..15806eeae217 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -22,20 +22,20 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
{
struct fstrim_range __user *user_range;
struct fstrim_range range;
- struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(sbi->sb->s_bdev))
return -EOPNOTSUPP;
user_range = (struct fstrim_range __user *)arg;
if (copy_from_user(&range, user_range, sizeof(range)))
return -EFAULT;
- range.minlen = max_t(u32, range.minlen, q->limits.discard_granularity);
+ range.minlen = max_t(u32, range.minlen,
+ bdev_discard_granularity(sbi->sb->s_bdev));
err = ntfs_trim_fs(sbi, &range);
if (err < 0)
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 278dcf502410..5781b9e8e3d8 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -882,7 +882,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
int err;
struct ntfs_sb_info *sbi = sb->s_fs_info;
struct block_device *bdev = sb->s_bdev;
- struct request_queue *rq;
struct inode *inode;
struct ntfs_inode *ni;
size_t i, tt;
@@ -912,15 +911,14 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto out;
}
- rq = bdev_get_queue(bdev);
- if (blk_queue_discard(rq) && rq->limits.discard_granularity) {
- sbi->discard_granularity = rq->limits.discard_granularity;
+ if (bdev_max_discard_sectors(bdev) && bdev_discard_granularity(bdev)) {
+ sbi->discard_granularity = bdev_discard_granularity(bdev);
sbi->discard_granularity_mask_inv =
~(u64)(sbi->discard_granularity - 1);
}
/* Parse boot. */
- err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512,
+ err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev),
bdev_nr_bytes(bdev));
if (err)
goto out;
@@ -1335,7 +1333,7 @@ int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len)
return 0;
err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9,
- GFP_NOFS, 0);
+ GFP_NOFS);
if (err == -EOPNOTSUPP)
sbi->flags |= NTFS_FLAGS_NODISCARD;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index f59461d85da4..afd54ec66103 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -903,20 +903,19 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FITRIM:
{
struct super_block *sb = inode->i_sb;
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(sb->s_bdev))
return -EOPNOTSUPP;
if (copy_from_user(&range, argp, sizeof(range)))
return -EFAULT;
- range.minlen = max_t(u64, q->limits.discard_granularity,
+ range.minlen = max_t(u64, bdev_discard_granularity(sb->s_bdev),
range.minlen);
ret = ocfs2_trim_fs(sb, &range);
if (ret < 0)
diff --git a/fs/pipe.c b/fs/pipe.c
index 9648ac15164a..e140ea150bbb 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -804,7 +804,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
goto out_revert_acct;
- pipe->bufs = kvcalloc(pipe_bufs, sizeof(struct pipe_buffer),
+ pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
GFP_KERNEL_ACCOUNT);
if (pipe->bufs) {
@@ -849,7 +849,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
#endif
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
- kvfree(pipe->bufs);
+ kfree(pipe->bufs);
kfree(pipe);
}
@@ -1264,7 +1264,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
if (nr_slots < n)
return -EBUSY;
- bufs = kvcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT);
+ bufs = kcalloc(nr_slots, sizeof(*bufs),
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (unlikely(!bufs))
return -ENOMEM;
@@ -1291,7 +1292,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
head = n;
tail = 0;
- kvfree(pipe->bufs);
+ kfree(pipe->bufs);
pipe->bufs = bufs;
pipe->ring_size = nr_slots;
if (pipe->max_usage > nr_slots)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 80acb6885cf9..962d32468eb4 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -759,9 +759,14 @@ static void posix_acl_fix_xattr_userns(
}
void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
+
+ /* Leave ids untouched on non-idmapped mounts. */
+ if (no_idmapping(mnt_userns, i_user_ns(inode)))
+ mnt_userns = &init_user_ns;
if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
return;
posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
@@ -769,9 +774,14 @@ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
}
void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
void *value, size_t size)
{
struct user_namespace *user_ns = current_user_ns();
+
+ /* Leave ids untouched on non-idmapped mounts. */
+ if (no_idmapping(mnt_userns, i_user_ns(inode)))
+ mnt_userns = &init_user_ns;
if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
return;
posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 172c86270b31..913bef0d2a36 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -72,7 +72,7 @@ out:
return 0;
}
-static int seq_fdinfo_open(struct inode *inode, struct file *file)
+static int proc_fdinfo_access_allowed(struct inode *inode)
{
bool allowed = false;
struct task_struct *task = get_proc_task(inode);
@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file)
if (!allowed)
return -EACCES;
+ return 0;
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+ int ret = proc_fdinfo_access_allowed(inode);
+
+ if (ret)
+ return ret;
+
return single_open(file, seq_show, inode);
}
@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
proc_fdinfo_instantiate);
}
+static int proc_open_fdinfo(struct inode *inode, struct file *file)
+{
+ int ret = proc_fdinfo_access_allowed(inode);
+
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
const struct inode_operations proc_fdinfo_inode_operations = {
.lookup = proc_lookupfdinfo,
.setattr = proc_setattr,
};
const struct file_operations proc_fdinfo_operations = {
+ .open = proc_open_fdinfo,
.read = generic_read_dir,
.iterate_shared = proc_readfdinfo,
.llseek = generic_file_llseek,
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 622c844f6d11..8879d052f96c 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -86,17 +86,10 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
int error, i;
struct bio *bio;
- if (page_count <= BIO_MAX_VECS) {
- bio = bio_alloc(sb->s_bdev, page_count, REQ_OP_READ, GFP_NOIO);
- } else {
- bio = bio_kmalloc(GFP_NOIO, page_count);
- bio_set_dev(bio, sb->s_bdev);
- bio->bi_opf = REQ_OP_READ;
- }
-
+ bio = bio_kmalloc(page_count, GFP_NOIO);
if (!bio)
return -ENOMEM;
-
+ bio_init(bio, sb->s_bdev, bio->bi_inline_vecs, page_count, REQ_OP_READ);
bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
for (i = 0; i < page_count; ++i) {
@@ -126,7 +119,8 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
out_free_bio:
bio_free_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
return error;
}
@@ -190,7 +184,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
length |= data[0] << 8;
}
bio_free_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
compressed = SQUASHFS_COMPRESSED(length);
length = SQUASHFS_COMPRESSED_SIZE(length);
@@ -224,7 +219,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
out_free_bio:
bio_free_pages(bio);
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
out:
if (res < 0) {
ERROR("Failed to read block 0x%llx: %d\n", index, res);
diff --git a/fs/super.c b/fs/super.c
index f1d4a193602d..60f57c7bc0a6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1204,7 +1204,7 @@ static int set_bdev_super(struct super_block *s, void *data)
s->s_dev = s->s_bdev->bd_dev;
s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
- if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
+ if (bdev_stable_writes(s->s_bdev))
s->s_iflags |= SB_I_STABLE_WRITES;
return 0;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 0ed4861b038f..b3d5f97f16cd 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -75,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
if (fileident) {
if (adinicb || (offset + lfi < 0)) {
- memcpy(udf_get_fi_ident(sfi), fileident, lfi);
+ memcpy(sfi->impUse + liu, fileident, lfi);
} else if (offset >= 0) {
memcpy(fibh->ebh->b_data + offset, fileident, lfi);
} else {
- memcpy(udf_get_fi_ident(sfi), fileident, -offset);
+ memcpy(sfi->impUse + liu, fileident, -offset);
memcpy(fibh->ebh->b_data, fileident - offset,
lfi + offset);
}
@@ -88,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
offset += lfi;
if (adinicb || (offset + padlen < 0)) {
- memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen);
+ memset(sfi->impUse + liu + lfi, 0x00, padlen);
} else if (offset >= 0) {
memset(fibh->ebh->b_data + offset, 0x00, padlen);
} else {
- memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset);
+ memset(sfi->impUse + liu + lfi, 0x00, -offset);
memset(fibh->ebh->b_data, 0x00, padlen + offset);
}
diff --git a/fs/xattr.c b/fs/xattr.c
index 5c8c5175b385..e8dd03e4561e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -25,6 +25,8 @@
#include <linux/uaccess.h>
+#include "internal.h"
+
static const char *
strcmp_prefix(const char *a, const char *a_prefix)
{
@@ -539,43 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
/*
* Extended attribute SET operations
*/
-static long
-setxattr(struct user_namespace *mnt_userns, struct dentry *d,
- const char __user *name, const void __user *value, size_t size,
- int flags)
+
+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
{
int error;
- void *kvalue = NULL;
- char kname[XATTR_NAME_MAX + 1];
- if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
+ if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
return -EINVAL;
- error = strncpy_from_user(kname, name, sizeof(kname));
- if (error == 0 || error == sizeof(kname))
- error = -ERANGE;
+ error = strncpy_from_user(ctx->kname->name, name,
+ sizeof(ctx->kname->name));
+ if (error == 0 || error == sizeof(ctx->kname->name))
+ return -ERANGE;
if (error < 0)
return error;
- if (size) {
- if (size > XATTR_SIZE_MAX)
+ error = 0;
+ if (ctx->size) {
+ if (ctx->size > XATTR_SIZE_MAX)
return -E2BIG;
- kvalue = kvmalloc(size, GFP_KERNEL);
- if (!kvalue)
- return -ENOMEM;
- if (copy_from_user(kvalue, value, size)) {
- error = -EFAULT;
- goto out;
+
+ ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
+ if (IS_ERR(ctx->kvalue)) {
+ error = PTR_ERR(ctx->kvalue);
+ ctx->kvalue = NULL;
}
- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size);
}
- error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
-out:
- kvfree(kvalue);
+ return error;
+}
+
+static void setxattr_convert(struct user_namespace *mnt_userns,
+ struct dentry *d, struct xattr_ctx *ctx)
+{
+ if (ctx->size &&
+ ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+ (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
+ posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
+ ctx->kvalue, ctx->size);
+}
+
+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+ struct xattr_ctx *ctx)
+{
+ setxattr_convert(mnt_userns, dentry, ctx);
+ return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
+ ctx->kvalue, ctx->size, ctx->flags);
+}
+
+static long
+setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name, const void __user *value, size_t size,
+ int flags)
+{
+ struct xattr_name kname;
+ struct xattr_ctx ctx = {
+ .cvalue = value,
+ .kvalue = NULL,
+ .size = size,
+ .kname = &kname,
+ .flags = flags,
+ };
+ int error;
+ error = setxattr_copy(name, &ctx);
+ if (error)
+ return error;
+
+ error = do_setxattr(mnt_userns, d, &ctx);
+
+ kvfree(ctx.kvalue);
return error;
}
@@ -641,43 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
/*
* Extended attribute GET operations
*/
-static ssize_t
-getxattr(struct user_namespace *mnt_userns, struct dentry *d,
- const char __user *name, void __user *value, size_t size)
+ssize_t
+do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ struct xattr_ctx *ctx)
{
ssize_t error;
- void *kvalue = NULL;
- char kname[XATTR_NAME_MAX + 1];
-
- error = strncpy_from_user(kname, name, sizeof(kname));
- if (error == 0 || error == sizeof(kname))
- error = -ERANGE;
- if (error < 0)
- return error;
+ char *kname = ctx->kname->name;
- if (size) {
- if (size > XATTR_SIZE_MAX)
- size = XATTR_SIZE_MAX;
- kvalue = kvzalloc(size, GFP_KERNEL);
- if (!kvalue)
+ if (ctx->size) {
+ if (ctx->size > XATTR_SIZE_MAX)
+ ctx->size = XATTR_SIZE_MAX;
+ ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
+ if (!ctx->kvalue)
return -ENOMEM;
}
- error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
+ error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error);
- if (size && copy_to_user(value, kvalue, error))
+ posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
+ ctx->kvalue, error);
+ if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT;
- } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
+ } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
/* The file system tried to returned a value bigger
than XATTR_SIZE_MAX bytes. Not possible. */
error = -E2BIG;
}
- kvfree(kvalue);
+ return error;
+}
+
+static ssize_t
+getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+ const char __user *name, void __user *value, size_t size)
+{
+ ssize_t error;
+ struct xattr_name kname;
+ struct xattr_ctx ctx = {
+ .value = value,
+ .kvalue = NULL,
+ .size = size,
+ .kname = &kname,
+ .flags = 0,
+ };
+
+ error = strncpy_from_user(kname.name, name, sizeof(kname.name));
+ if (error == 0 || error == sizeof(kname.name))
+ error = -ERANGE;
+ if (error < 0)
+ return error;
+
+ error = do_getxattr(mnt_userns, d, &ctx);
+ kvfree(ctx.kvalue);
return error;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e1afb9e503e1..bf4e60871068 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -406,7 +406,7 @@ xfs_buf_alloc_pages(
STATIC int
_xfs_buf_map_pages(
struct xfs_buf *bp,
- uint flags)
+ xfs_buf_flags_t flags)
{
ASSERT(bp->b_flags & _XBF_PAGES);
if (bp->b_page_count == 1) {
@@ -868,7 +868,7 @@ xfs_buf_read_uncached(
struct xfs_buftarg *target,
xfs_daddr_t daddr,
size_t numblks,
- int flags,
+ xfs_buf_flags_t flags,
struct xfs_buf **bpp,
const struct xfs_buf_ops *ops)
{
@@ -903,7 +903,7 @@ int
xfs_buf_get_uncached(
struct xfs_buftarg *target,
size_t numblks,
- int flags,
+ xfs_buf_flags_t flags,
struct xfs_buf **bpp)
{
int error;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index edcb6254fa6a..1ee3056ff9cf 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -22,28 +22,28 @@ struct xfs_buf;
#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-#define XBF_READ (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
-#define XBF_NO_IOACCT (1 << 3) /* bypass I/O accounting (non-LRU bufs) */
-#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
-#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */
+#define XBF_READ (1u << 0) /* buffer intended for reading from device */
+#define XBF_WRITE (1u << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD (1u << 2) /* asynchronous read-ahead */
+#define XBF_NO_IOACCT (1u << 3) /* bypass I/O accounting (non-LRU bufs) */
+#define XBF_ASYNC (1u << 4) /* initiator will not wait for completion */
+#define XBF_DONE (1u << 5) /* all pages in the buffer uptodate */
+#define XBF_STALE (1u << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL (1u << 7) /* async writes have failed on this buffer */
/* buffer type flags for write callbacks */
-#define _XBF_INODES (1 << 16)/* inode buffer */
-#define _XBF_DQUOTS (1 << 17)/* dquot buffer */
-#define _XBF_LOGRECOVERY (1 << 18)/* log recovery buffer */
+#define _XBF_INODES (1u << 16)/* inode buffer */
+#define _XBF_DQUOTS (1u << 17)/* dquot buffer */
+#define _XBF_LOGRECOVERY (1u << 18)/* log recovery buffer */
/* flags used only internally */
-#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM (1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
+#define _XBF_PAGES (1u << 20)/* backed by refcounted pages */
+#define _XBF_KMEM (1u << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */
/* flags used only as arguments to access routines */
-#define XBF_TRYLOCK (1 << 30)/* lock requested, but do not wait */
-#define XBF_UNMAPPED (1 << 31)/* do not map the buffer */
+#define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */
+#define XBF_UNMAPPED (1u << 31)/* do not map the buffer */
typedef unsigned int xfs_buf_flags_t;
@@ -58,7 +58,7 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_WRITE_FAIL, "WRITE_FAIL" }, \
{ _XBF_INODES, "INODES" }, \
{ _XBF_DQUOTS, "DQUOTS" }, \
- { _XBF_LOGRECOVERY, "LOG_RECOVERY" }, \
+ { _XBF_LOGRECOVERY, "LOG_RECOVERY" }, \
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
@@ -247,11 +247,11 @@ xfs_buf_readahead(
return xfs_buf_readahead_map(target, &map, 1, ops);
}
-int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
- struct xfs_buf **bpp);
+int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
+ xfs_buf_flags_t flags, struct xfs_buf **bpp);
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
- size_t numblks, int flags, struct xfs_buf **bpp,
- const struct xfs_buf_ops *ops);
+ size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp,
+ const struct xfs_buf_ops *ops);
int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags);
void xfs_buf_hold(struct xfs_buf *bp);
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 0191de8ce9ce..c6fe3f6ebb6b 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -114,7 +114,7 @@ xfs_trim_extents(
}
trace_xfs_discard_extent(mp, agno, fbno, flen);
- error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
+ error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
if (error)
goto out_del_cursor;
*blocks_trimmed += flen;
@@ -152,8 +152,8 @@ xfs_ioc_trim(
struct xfs_mount *mp,
struct fstrim_range __user *urange)
{
- struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);
- unsigned int granularity = q->limits.discard_granularity;
+ unsigned int granularity =
+ bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
struct fstrim_range range;
xfs_daddr_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
@@ -162,7 +162,7 @@ xfs_ioc_trim(
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
return -EOPNOTSUPP;
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9de6205fe134..39ae53efb3ab 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2594,14 +2594,13 @@ xfs_ifree_cluster(
}
/*
- * This is called to return an inode to the inode free list.
- * The inode should already be truncated to 0 length and have
- * no pages associated with it. This routine also assumes that
- * the inode is already a part of the transaction.
+ * This is called to return an inode to the inode free list. The inode should
+ * already be truncated to 0 length and have no pages associated with it. This
+ * routine also assumes that the inode is already a part of the transaction.
*
- * The on-disk copy of the inode will have been added to the list
- * of unlinked inodes in the AGI. We need to remove the inode from
- * that list atomically with respect to freeing it here.
+ * The on-disk copy of the inode will have been added to the list of unlinked
+ * inodes in the AGI. We need to remove the inode from that list atomically with
+ * respect to freeing it here.
*/
int
xfs_ifree(
@@ -2623,13 +2622,16 @@ xfs_ifree(
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
/*
- * Pull the on-disk inode from the AGI unlinked list.
+ * Free the inode first so that we guarantee that the AGI lock is going
+ * to be taken before we remove the inode from the unlinked list. This
+ * makes the AGI lock -> unlinked list modification order the same as
+ * used in O_TMPFILE creation.
*/
- error = xfs_iunlink_remove(tp, pag, ip);
+ error = xfs_difree(tp, pag, ip->i_ino, &xic);
if (error)
- goto out;
+ return error;
- error = xfs_difree(tp, pag, ip->i_ino, &xic);
+ error = xfs_iunlink_remove(tp, pag, ip);
if (error)
goto out;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ba57323bfdce..c9f55e4f0957 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -605,7 +605,7 @@ xlog_discard_busy_extents(
error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
- GFP_NOFS, 0, &bio);
+ GFP_NOFS, &bio);
if (error && error != -EOPNOTSUPP) {
xfs_info(mp,
"discard failed for extent [0x%llx,%u], error %d",
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 54be9d64093e..a276b8111f63 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1608,14 +1608,10 @@ xfs_fs_fill_super(
goto out_filestream_unmount;
}
- if (xfs_has_discard(mp)) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
-
- if (!blk_queue_discard(q)) {
- xfs_warn(mp, "mounting with \"discard\" option, but "
- "the device does not support discard");
- mp->m_features &= ~XFS_FEAT_DISCARD;
- }
+ if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
+ xfs_warn(mp,
+ "mounting with \"discard\" option, but the device does not support discard");
+ mp->m_features &= ~XFS_FEAT_DISCARD;
}
if (xfs_has_reflink(mp)) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index de177842b951..0c82673238f4 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -175,7 +175,7 @@ xfs_trans_get_buf(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
int numblks,
- uint flags,
+ xfs_buf_flags_t flags,
struct xfs_buf **bpp)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile
index 33c1a4f1132e..9fe54f5319f2 100644
--- a/fs/zonefs/Makefile
+++ b/fs/zonefs/Makefile
@@ -3,4 +3,4 @@ ccflags-y += -I$(src)
obj-$(CONFIG_ZONEFS_FS) += zonefs.o
-zonefs-y := super.o
+zonefs-y := super.o sysfs.o
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 3614c7834007..b3b0b71fdf6c 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -27,6 +27,39 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
+/*
+ * Manage the active zone count. Called with zi->i_truncate_mutex held.
+ */
+static void zonefs_account_active(struct inode *inode)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ lockdep_assert_held(&zi->i_truncate_mutex);
+
+ if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+ return;
+
+ /*
+ * If the zone is active, that is, if it is explicitly open or
+ * partially written, check if it was already accounted as active.
+ */
+ if ((zi->i_flags & ZONEFS_ZONE_OPEN) ||
+ (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) {
+ if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) {
+ zi->i_flags |= ZONEFS_ZONE_ACTIVE;
+ atomic_inc(&sbi->s_active_seq_files);
+ }
+ return;
+ }
+
+ /* The zone is not active. If it was, update the active count */
+ if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
+ zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
+ atomic_dec(&sbi->s_active_seq_files);
+ }
+}
+
static inline int zonefs_zone_mgmt(struct inode *inode,
enum req_opf op)
{
@@ -35,6 +68,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode,
lockdep_assert_held(&zi->i_truncate_mutex);
+ /*
+ * With ZNS drives, closing an explicitly open zone that has not been
+ * written will change the zone state to "closed", that is, the zone
+ * will remain active. Since this can then cause failure of explicit
+ * open operation on other zones if the drive active zone resources
+ * are exceeded, make sure that the zone does not remain active by
+ * resetting it.
+ */
+ if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
+ op = REQ_OP_ZONE_RESET;
+
trace_zonefs_zone_mgmt(inode, op);
ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
@@ -57,8 +101,13 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize)
* A full zone is no longer open/active and does not need
* explicit closing.
*/
- if (isize >= zi->i_max_size)
- zi->i_flags &= ~ZONEFS_ZONE_OPEN;
+ if (isize >= zi->i_max_size) {
+ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+
+ if (zi->i_flags & ZONEFS_ZONE_ACTIVE)
+ atomic_dec(&sbi->s_active_seq_files);
+ zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
+ }
}
static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -386,6 +435,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
zonefs_update_stats(inode, data_size);
zonefs_i_size_write(inode, data_size);
zi->i_wpoffset = data_size;
+ zonefs_account_active(inode);
return 0;
}
@@ -497,6 +547,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
zonefs_update_stats(inode, isize);
truncate_setsize(inode, isize);
zi->i_wpoffset = isize;
+ zonefs_account_active(inode);
unlock:
mutex_unlock(&zi->i_truncate_mutex);
@@ -678,13 +729,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct block_device *bdev = inode->i_sb->s_bdev;
- unsigned int max;
+ unsigned int max = bdev_max_zone_append_sectors(bdev);
struct bio *bio;
ssize_t size;
int nr_pages;
ssize_t ret;
- max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
iov_iter_truncate(from, max);
@@ -855,8 +905,15 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
(ret > 0 || ret == -EIOCBQUEUED)) {
if (ret > 0)
count = ret;
+
+ /*
+ * Update the zone write pointer offset assuming the write
+ * operation succeeded. If it did not, the error recovery path
+ * will correct it. Also do active seq file accounting.
+ */
mutex_lock(&zi->i_truncate_mutex);
zi->i_wpoffset += count;
+ zonefs_account_active(inode);
mutex_unlock(&zi->i_truncate_mutex);
}
@@ -998,13 +1055,13 @@ inode_unlock:
return ret;
}
-static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *file)
+/*
+ * Write open accounting is done only for sequential files.
+ */
+static inline bool zonefs_seq_file_need_wro(struct inode *inode,
+ struct file *file)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
-
- if (!(sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN))
- return false;
if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
return false;
@@ -1015,28 +1072,34 @@ static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *fi
return true;
}
-static int zonefs_open_zone(struct inode *inode)
+static int zonefs_seq_file_write_open(struct inode *inode)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
int ret = 0;
mutex_lock(&zi->i_truncate_mutex);
if (!zi->i_wr_refcnt) {
- if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
- atomic_dec(&sbi->s_open_zones);
- ret = -EBUSY;
- goto unlock;
- }
+ struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
+ unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
- if (i_size_read(inode) < zi->i_max_size) {
- ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
- if (ret) {
- atomic_dec(&sbi->s_open_zones);
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+
+ if (wro > sbi->s_max_wro_seq_files) {
+ atomic_dec(&sbi->s_wro_seq_files);
+ ret = -EBUSY;
goto unlock;
}
- zi->i_flags |= ZONEFS_ZONE_OPEN;
+
+ if (i_size_read(inode) < zi->i_max_size) {
+ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
+ if (ret) {
+ atomic_dec(&sbi->s_wro_seq_files);
+ goto unlock;
+ }
+ zi->i_flags |= ZONEFS_ZONE_OPEN;
+ zonefs_account_active(inode);
+ }
}
}
@@ -1056,30 +1119,31 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
if (ret)
return ret;
- if (zonefs_file_use_exp_open(inode, file))
- return zonefs_open_zone(inode);
+ if (zonefs_seq_file_need_wro(inode, file))
+ return zonefs_seq_file_write_open(inode);
return 0;
}
-static void zonefs_close_zone(struct inode *inode)
+static void zonefs_seq_file_write_close(struct inode *inode)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
int ret = 0;
mutex_lock(&zi->i_truncate_mutex);
- zi->i_wr_refcnt--;
- if (!zi->i_wr_refcnt) {
- struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
- struct super_block *sb = inode->i_sb;
- /*
- * If the file zone is full, it is not open anymore and we only
- * need to decrement the open count.
- */
- if (!(zi->i_flags & ZONEFS_ZONE_OPEN))
- goto dec;
+ zi->i_wr_refcnt--;
+ if (zi->i_wr_refcnt)
+ goto unlock;
+ /*
+ * The file zone may not be open anymore (e.g. the file was truncated to
+ * its maximum size or it was fully written). For this case, we only
+ * need to decrement the write open count.
+ */
+ if (zi->i_flags & ZONEFS_ZONE_OPEN) {
ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
if (ret) {
__zonefs_io_error(inode, false);
@@ -1091,14 +1155,23 @@ static void zonefs_close_zone(struct inode *inode)
*/
if (zi->i_flags & ZONEFS_ZONE_OPEN &&
!(sb->s_flags & SB_RDONLY)) {
- zonefs_warn(sb, "closing zone failed, remounting filesystem read-only\n");
+ zonefs_warn(sb,
+ "closing zone at %llu failed %d\n",
+ zi->i_zsector, ret);
+ zonefs_warn(sb,
+ "remounting filesystem read-only\n");
sb->s_flags |= SB_RDONLY;
}
+ goto unlock;
}
+
zi->i_flags &= ~ZONEFS_ZONE_OPEN;
-dec:
- atomic_dec(&sbi->s_open_zones);
+ zonefs_account_active(inode);
}
+
+ atomic_dec(&sbi->s_wro_seq_files);
+
+unlock:
mutex_unlock(&zi->i_truncate_mutex);
}
@@ -1110,8 +1183,8 @@ static int zonefs_file_release(struct inode *inode, struct file *file)
* the zone has gone offline or read-only). Make sure we don't fail the
* close(2) for user-space.
*/
- if (zonefs_file_use_exp_open(inode, file))
- zonefs_close_zone(inode);
+ if (zonefs_seq_file_need_wro(inode, file))
+ zonefs_seq_file_write_close(inode);
return 0;
}
@@ -1142,6 +1215,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
inode_init_once(&zi->i_vnode);
mutex_init(&zi->i_truncate_mutex);
zi->i_wr_refcnt = 0;
+ zi->i_flags = 0;
return &zi->i_vnode;
}
@@ -1293,12 +1367,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
inc_nlink(parent);
}
-static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
- enum zonefs_ztype type)
+static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ enum zonefs_ztype type)
{
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ int ret;
inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
inode->i_mode = S_IFREG | sbi->s_perm;
@@ -1323,6 +1398,29 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+
+ mutex_lock(&zi->i_truncate_mutex);
+
+ /*
+ * For sequential zones, make sure that any open zone is closed first
+ * to ensure that the initial number of open zones is 0, in sync with
+ * the open zone accounting done when the mount option
+ * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
+ */
+ if (type == ZONEFS_ZTYPE_SEQ &&
+ (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
+ zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
+ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
+ if (ret)
+ goto unlock;
+ }
+
+ zonefs_account_active(inode);
+
+unlock:
+ mutex_unlock(&zi->i_truncate_mutex);
+
+ return 0;
}
static struct dentry *zonefs_create_inode(struct dentry *parent,
@@ -1332,6 +1430,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
struct inode *dir = d_inode(parent);
struct dentry *dentry;
struct inode *inode;
+ int ret;
dentry = d_alloc_name(parent, name);
if (!dentry)
@@ -1342,10 +1441,16 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
goto dput;
inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
- if (zone)
- zonefs_init_file_inode(inode, zone, type);
- else
+ if (zone) {
+ ret = zonefs_init_file_inode(inode, zone, type);
+ if (ret) {
+ iput(inode);
+ goto dput;
+ }
+ } else {
zonefs_init_dir_inode(dir, inode, type);
+ }
+
d_add(dentry, inode);
dir->i_size++;
@@ -1652,14 +1757,18 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_gid = GLOBAL_ROOT_GID;
sbi->s_perm = 0640;
sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
- sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev);
- atomic_set(&sbi->s_open_zones, 0);
- if (!sbi->s_max_open_zones &&
+
+ atomic_set(&sbi->s_wro_seq_files, 0);
+ sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
+ if (!sbi->s_max_wro_seq_files &&
sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
}
+ atomic_set(&sbi->s_active_seq_files, 0);
+ sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
+
ret = zonefs_read_super(sb);
if (ret)
return ret;
@@ -1674,6 +1783,10 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
if (ret)
goto cleanup;
+ ret = zonefs_sysfs_register(sb);
+ if (ret)
+ goto cleanup;
+
zonefs_info(sb, "Mounting %u zones",
blkdev_nr_zones(sb->s_bdev->bd_disk));
@@ -1719,6 +1832,8 @@ static void zonefs_kill_super(struct super_block *sb)
if (sb->s_root)
d_genocide(sb->s_root);
+
+ zonefs_sysfs_unregister(sb);
kill_block_super(sb);
kfree(sbi);
}
@@ -1766,16 +1881,26 @@ static int __init zonefs_init(void)
return ret;
ret = register_filesystem(&zonefs_type);
- if (ret) {
- zonefs_destroy_inodecache();
- return ret;
- }
+ if (ret)
+ goto destroy_inodecache;
+
+ ret = zonefs_sysfs_init();
+ if (ret)
+ goto unregister_fs;
return 0;
+
+unregister_fs:
+ unregister_filesystem(&zonefs_type);
+destroy_inodecache:
+ zonefs_destroy_inodecache();
+
+ return ret;
}
static void __exit zonefs_exit(void)
{
+ zonefs_sysfs_exit();
zonefs_destroy_inodecache();
unregister_filesystem(&zonefs_type);
}
diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c
new file mode 100644
index 000000000000..9cb6755ce39a
--- /dev/null
+++ b/fs/zonefs/sysfs.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple file system for zoned block devices exposing zones as files.
+ *
+ * Copyright (C) 2022 Western Digital Corporation or its affiliates.
+ */
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/blkdev.h>
+
+#include "zonefs.h"
+
+struct zonefs_sysfs_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf);
+};
+
+static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr)
+{
+ return container_of(attr, struct zonefs_sysfs_attr, attr);
+}
+
+#define ZONEFS_SYSFS_ATTR_RO(name) \
+static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name)
+
+#define ATTR_LIST(name) &zonefs_sysfs_attr_##name.attr
+
+static ssize_t zonefs_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct zonefs_sb_info *sbi =
+ container_of(kobj, struct zonefs_sb_info, s_kobj);
+ struct zonefs_sysfs_attr *zonefs_attr =
+ container_of(attr, struct zonefs_sysfs_attr, attr);
+
+ if (!zonefs_attr->show)
+ return 0;
+
+ return zonefs_attr->show(sbi, buf);
+}
+
+static ssize_t max_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+ return sysfs_emit(buf, "%u\n", sbi->s_max_wro_seq_files);
+}
+ZONEFS_SYSFS_ATTR_RO(max_wro_seq_files);
+
+static ssize_t nr_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_wro_seq_files));
+}
+ZONEFS_SYSFS_ATTR_RO(nr_wro_seq_files);
+
+static ssize_t max_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+ return sysfs_emit(buf, "%u\n", sbi->s_max_active_seq_files);
+}
+ZONEFS_SYSFS_ATTR_RO(max_active_seq_files);
+
+static ssize_t nr_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_active_seq_files));
+}
+ZONEFS_SYSFS_ATTR_RO(nr_active_seq_files);
+
+static struct attribute *zonefs_sysfs_attrs[] = {
+ ATTR_LIST(max_wro_seq_files),
+ ATTR_LIST(nr_wro_seq_files),
+ ATTR_LIST(max_active_seq_files),
+ ATTR_LIST(nr_active_seq_files),
+ NULL,
+};
+ATTRIBUTE_GROUPS(zonefs_sysfs);
+
+static void zonefs_sysfs_sb_release(struct kobject *kobj)
+{
+ struct zonefs_sb_info *sbi =
+ container_of(kobj, struct zonefs_sb_info, s_kobj);
+
+ complete(&sbi->s_kobj_unregister);
+}
+
+static const struct sysfs_ops zonefs_sysfs_attr_ops = {
+ .show = zonefs_sysfs_attr_show,
+};
+
+static struct kobj_type zonefs_sb_ktype = {
+ .default_groups = zonefs_sysfs_groups,
+ .sysfs_ops = &zonefs_sysfs_attr_ops,
+ .release = zonefs_sysfs_sb_release,
+};
+
+static struct kobject *zonefs_sysfs_root;
+
+int zonefs_sysfs_register(struct super_block *sb)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ int ret;
+
+ init_completion(&sbi->s_kobj_unregister);
+ ret = kobject_init_and_add(&sbi->s_kobj, &zonefs_sb_ktype,
+ zonefs_sysfs_root, "%s", sb->s_id);
+ if (ret) {
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+ return ret;
+ }
+
+ sbi->s_sysfs_registered = true;
+
+ return 0;
+}
+
+void zonefs_sysfs_unregister(struct super_block *sb)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+
+ if (!sbi || !sbi->s_sysfs_registered)
+ return;
+
+ kobject_del(&sbi->s_kobj);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+}
+
+int __init zonefs_sysfs_init(void)
+{
+ zonefs_sysfs_root = kobject_create_and_add("zonefs", fs_kobj);
+ if (!zonefs_sysfs_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void zonefs_sysfs_exit(void)
+{
+ kobject_put(zonefs_sysfs_root);
+ zonefs_sysfs_root = NULL;
+}
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 7b147907c328..4b3de66c3233 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -12,6 +12,7 @@
#include <linux/uuid.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
+#include <linux/kobject.h>
/*
* Maximum length of file names: this only needs to be large enough to fit
@@ -39,6 +40,7 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
}
#define ZONEFS_ZONE_OPEN (1 << 0)
+#define ZONEFS_ZONE_ACTIVE (1 << 1)
/*
* In-memory inode data.
@@ -182,8 +184,15 @@ struct zonefs_sb_info {
loff_t s_blocks;
loff_t s_used_blocks;
- unsigned int s_max_open_zones;
- atomic_t s_open_zones;
+ unsigned int s_max_wro_seq_files;
+ atomic_t s_wro_seq_files;
+
+ unsigned int s_max_active_seq_files;
+ atomic_t s_active_seq_files;
+
+ bool s_sysfs_registered;
+ struct kobject s_kobj;
+ struct completion s_kobj_unregister;
};
static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
@@ -198,4 +207,9 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
#define zonefs_warn(sb, format, args...) \
pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
+int zonefs_sysfs_register(struct super_block *sb);
+void zonefs_sysfs_unregister(struct super_block *sb);
+int zonefs_sysfs_init(void);
+void zonefs_sysfs_exit(void);
+
#endif
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index edb0e2a602a8..ba1f860af38b 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -21,6 +21,12 @@
#include <linux/panic.h>
#include <linux/printk.h>
+struct warn_args;
+struct pt_regs;
+
+void __warn(const char *file, int line, void *caller, unsigned taint,
+ struct pt_regs *regs, struct warn_args *args);
+
#ifdef CONFIG_BUG
#ifdef CONFIG_GENERIC_BUG
@@ -110,11 +116,6 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
#endif
/* used internally by panic.c */
-struct warn_args;
-struct pt_regs;
-
-void __warn(const char *file, int line, void *caller, unsigned taint,
- struct pt_regs *regs, struct warn_args *args);
#ifndef WARN_ON
#define WARN_ON(condition) ({ \
diff --git a/include/dt-bindings/clock/microchip,mpfs-clock.h b/include/dt-bindings/clock/microchip,mpfs-clock.h
index 73f2a9324857..4048669bf756 100644
--- a/include/dt-bindings/clock/microchip,mpfs-clock.h
+++ b/include/dt-bindings/clock/microchip,mpfs-clock.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/*
* Daire McNamara,<daire.mcnamara@microchip.com>
- * Copyright (C) 2020 Microchip Technology Inc. All rights reserved.
+ * Copyright (C) 2020-2022 Microchip Technology Inc. All rights reserved.
*/
#ifndef _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_
@@ -42,4 +42,7 @@
#define CLK_ATHENA 31
#define CLK_CFM 32
+#define CLK_RTCREF 33
+#define CLK_MSSPLL 34
+
#endif /* _DT_BINDINGS_CLK_MICROCHIP_MPFS_H_ */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index d06134ac6245..cece70231138 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -339,7 +339,7 @@ static inline void audit_uring_entry(u8 op)
}
static inline void audit_uring_exit(int success, long code)
{
- if (unlikely(!audit_dummy_context()))
+ if (unlikely(audit_context()))
__audit_uring_exit(success, code);
}
static inline void audit_syscall_entry(int major, unsigned long a0,
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 87ce24d238f3..2bd073fa6bb5 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -17,8 +17,6 @@
#include <linux/backing-dev-defs.h>
#include <linux/slab.h>
-struct blkcg;
-
static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
{
kref_get(&bdi->refcnt);
@@ -154,7 +152,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
struct cgroup_subsys_state *memcg_css,
gfp_t gfp);
void wb_memcg_offline(struct mem_cgroup *memcg);
-void wb_blkcg_offline(struct blkcg *blkcg);
+void wb_blkcg_offline(struct cgroup_subsys_state *css);
/**
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -378,7 +376,7 @@ static inline void wb_memcg_offline(struct mem_cgroup *memcg)
{
}
-static inline void wb_blkcg_offline(struct blkcg *blkcg)
+static inline void wb_blkcg_offline(struct cgroup_subsys_state *css)
{
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 278cc81cc1e7..1cf3738ef1ea 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -269,6 +269,7 @@ struct folio_iter {
size_t offset;
size_t length;
/* private: for use by the iterator */
+ struct folio *_next;
size_t _seg_count;
int _i;
};
@@ -283,6 +284,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
fi->_seg_count = bvec->bv_len;
fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count);
+ fi->_next = folio_next(fi->folio);
fi->_i = i;
}
@@ -290,9 +292,10 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
{
fi->_seg_count -= fi->length;
if (fi->_seg_count) {
- fi->folio = folio_next(fi->folio);
+ fi->folio = fi->_next;
fi->offset = 0;
fi->length = min(folio_size(fi->folio), fi->_seg_count);
+ fi->_next = folio_next(fi->folio);
} else if (fi->_i + 1 < bio->bi_vcnt) {
bio_first_folio(fi, bio, fi->_i + 1);
} else {
@@ -405,9 +408,7 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
struct bio_set *bs);
-struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
- unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
-struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
+struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask);
extern void bio_put(struct bio *);
struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
@@ -782,6 +783,12 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
bio->bi_opf |= REQ_NOWAIT;
}
+static inline void bio_clear_polled(struct bio *bio)
+{
+ /* can't support alloc cache if we turn off polling */
+ bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE);
+}
+
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, unsigned int opf, gfp_t gfp);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 652cd05b0924..9f40dbc65f82 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -14,265 +14,39 @@
* Nauman Rafique <nauman@google.com>
*/
-#include <linux/cgroup.h>
-#include <linux/percpu.h>
-#include <linux/percpu_counter.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/seq_file.h>
-#include <linux/radix-tree.h>
-#include <linux/blkdev.h>
-#include <linux/atomic.h>
-#include <linux/kthread.h>
-#include <linux/fs.h>
+#include <linux/types.h>
+
+struct bio;
+struct cgroup_subsys_state;
+struct request_queue;
#define FC_APPID_LEN 129
#ifdef CONFIG_BLK_CGROUP
-
-enum blkg_iostat_type {
- BLKG_IOSTAT_READ,
- BLKG_IOSTAT_WRITE,
- BLKG_IOSTAT_DISCARD,
-
- BLKG_IOSTAT_NR,
-};
-
-struct blkcg_gq;
-struct blkg_policy_data;
-
-struct blkcg {
- struct cgroup_subsys_state css;
- spinlock_t lock;
- refcount_t online_pin;
-
- struct radix_tree_root blkg_tree;
- struct blkcg_gq __rcu *blkg_hint;
- struct hlist_head blkg_list;
-
- struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
-
- struct list_head all_blkcgs_node;
-#ifdef CONFIG_BLK_CGROUP_FC_APPID
- char fc_app_id[FC_APPID_LEN];
-#endif
-#ifdef CONFIG_CGROUP_WRITEBACK
- struct list_head cgwb_list;
-#endif
-};
-
-struct blkg_iostat {
- u64 bytes[BLKG_IOSTAT_NR];
- u64 ios[BLKG_IOSTAT_NR];
-};
-
-struct blkg_iostat_set {
- struct u64_stats_sync sync;
- struct blkg_iostat cur;
- struct blkg_iostat last;
-};
-
-/* association between a blk cgroup and a request queue */
-struct blkcg_gq {
- /* Pointer to the associated request_queue */
- struct request_queue *q;
- struct list_head q_node;
- struct hlist_node blkcg_node;
- struct blkcg *blkcg;
-
- /* all non-root blkcg_gq's are guaranteed to have access to parent */
- struct blkcg_gq *parent;
-
- /* reference count */
- struct percpu_ref refcnt;
-
- /* is this blkg online? protected by both blkcg and q locks */
- bool online;
-
- struct blkg_iostat_set __percpu *iostat_cpu;
- struct blkg_iostat_set iostat;
-
- struct blkg_policy_data *pd[BLKCG_MAX_POLS];
-
- spinlock_t async_bio_lock;
- struct bio_list async_bios;
- union {
- struct work_struct async_bio_work;
- struct work_struct free_work;
- };
-
- atomic_t use_delay;
- atomic64_t delay_nsec;
- atomic64_t delay_start;
- u64 last_delay;
- int last_use;
-
- struct rcu_head rcu_head;
-};
-
extern struct cgroup_subsys_state * const blkcg_root_css;
-void blkcg_destroy_blkgs(struct blkcg *blkcg);
void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
void blkcg_maybe_throttle_current(void);
-
-static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
-{
- return css ? container_of(css, struct blkcg, css) : NULL;
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, %NULL if not associated.
- * Callers are expected to either handle %NULL or know association has been
- * done prior to calling this.
- */
-static inline struct blkcg *bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return NULL;
-}
-
-static inline bool blk_cgroup_congested(void)
-{
- struct cgroup_subsys_state *css;
- bool ret = false;
-
- rcu_read_lock();
- css = kthread_blkcg();
- if (!css)
- css = task_css(current, io_cgrp_id);
- while (css) {
- if (atomic_read(&css->cgroup->congestion_count)) {
- ret = true;
- break;
- }
- css = css->parent;
- }
- rcu_read_unlock();
- return ret;
-}
-
-/**
- * blkcg_parent - get the parent of a blkcg
- * @blkcg: blkcg of interest
- *
- * Return the parent blkcg of @blkcg. Can be called anytime.
- */
-static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
-{
- return css_to_blkcg(blkcg->css.parent);
-}
-
-/**
- * blkcg_pin_online - pin online state
- * @blkcg: blkcg of interest
- *
- * While pinned, a blkcg is kept online. This is primarily used to
- * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
- * while an associated cgwb is still active.
- */
-static inline void blkcg_pin_online(struct blkcg *blkcg)
-{
- refcount_inc(&blkcg->online_pin);
-}
-
-/**
- * blkcg_unpin_online - unpin online state
- * @blkcg: blkcg of interest
- *
- * This is primarily used to impedance-match blkg and cgwb lifetimes so
- * that blkg doesn't go offline while an associated cgwb is still active.
- * When this count goes to zero, all active cgwbs have finished so the
- * blkcg can continue destruction by calling blkcg_destroy_blkgs().
- */
-static inline void blkcg_unpin_online(struct blkcg *blkcg)
-{
- do {
- if (!refcount_dec_and_test(&blkcg->online_pin))
- break;
- blkcg_destroy_blkgs(blkcg);
- blkcg = blkcg_parent(blkcg);
- } while (blkcg);
-}
+bool blk_cgroup_congested(void);
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css);
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css);
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio);
#else /* CONFIG_BLK_CGROUP */
-struct blkcg {
-};
-
-struct blkcg_gq {
-};
-
#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
static inline void blkcg_maybe_throttle_current(void) { }
static inline bool blk_cgroup_congested(void) { return false; }
-
-#ifdef CONFIG_BLOCK
static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
-static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
-#endif /* CONFIG_BLOCK */
-
-#endif /* CONFIG_BLK_CGROUP */
-
-#ifdef CONFIG_BLK_CGROUP_FC_APPID
-/*
- * Sets the fc_app_id field associted to blkcg
- * @app_id: application identifier
- * @cgrp_id: cgroup id
- * @app_id_len: size of application identifier
- */
-static inline int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
-{
- struct cgroup *cgrp;
- struct cgroup_subsys_state *css;
- struct blkcg *blkcg;
- int ret = 0;
-
- if (app_id_len > FC_APPID_LEN)
- return -EINVAL;
-
- cgrp = cgroup_get_from_id(cgrp_id);
- if (!cgrp)
- return -ENOENT;
- css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
- if (!css) {
- ret = -ENOENT;
- goto out_cgrp_put;
- }
- blkcg = css_to_blkcg(css);
- /*
- * There is a slight race condition on setting the appid.
- * Worst case an I/O may not find the right id.
- * This is no different from the I/O we let pass while obtaining
- * the vmid from the fabric.
- * Adding the overhead of a lock is not necessary.
- */
- strlcpy(blkcg->fc_app_id, app_id, app_id_len);
- css_put(css);
-out_cgrp_put:
- cgroup_put(cgrp);
- return ret;
-}
-
-/**
- * blkcg_get_fc_appid - get the fc app identifier associated with a bio
- * @bio: target bio
- *
- * On success return the fc_app_id, on failure return NULL
- */
-static inline char *blkcg_get_fc_appid(struct bio *bio)
+static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
{
- if (bio && bio->bi_blkg &&
- (bio->bi_blkg->blkcg->fc_app_id[0] != '\0'))
- return bio->bi_blkg->blkcg->fc_app_id;
return NULL;
}
-#else
-static inline int blkcg_set_fc_appid(char *buf, u64 id, size_t len) { return -EINVAL; }
-static inline char *blkcg_get_fc_appid(struct bio *bio) { return NULL; }
-#endif /*CONFIG_BLK_CGROUP_FC_APPID*/
+#endif /* CONFIG_BLK_CGROUP */
+
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len);
+char *blkcg_get_fc_appid(struct bio *bio);
+
#endif /* _BLK_CGROUP_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7aa5c54901a9..9f07061418db 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -163,7 +163,6 @@ struct request {
struct rb_node rb_node; /* sort/lookup */
struct bio_vec special_vec;
void *completion_data;
- int error_count; /* for legacy drivers, don't use */
};
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1973ef9bd40f..c007d58d2703 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -44,7 +44,7 @@ struct block_device {
unsigned long bd_stamp;
bool bd_read_only; /* read-only policy */
dev_t bd_dev;
- int bd_openers;
+ atomic_t bd_openers;
struct inode * bd_inode; /* will die */
struct super_block * bd_super;
void * bd_claiming;
@@ -246,9 +246,8 @@ typedef unsigned int blk_qc_t;
struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
- unsigned int bi_opf; /* bottom bits req flags,
- * top bits REQ_OP. Use
- * accessors.
+ unsigned int bi_opf; /* bottom bits REQ_OP, top bits
+ * req_flags.
*/
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
@@ -329,7 +328,6 @@ enum {
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
- BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
BIO_FLAG_LAST
};
@@ -409,15 +407,17 @@ enum req_flag_bits {
* work item to avoid such priority inversions.
*/
__REQ_CGROUP_PUNT,
+ __REQ_POLLED, /* caller polls for completion using bio_poll */
+ __REQ_ALLOC_CACHE, /* allocate IO from cache if available */
+ __REQ_SWAP, /* swap I/O */
+ __REQ_DRV, /* for driver use */
- /* command specific flags for REQ_OP_WRITE_ZEROES: */
+ /*
+ * Command specific flags, keep last:
+ */
+ /* for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
- __REQ_POLLED, /* caller polls for completion using bio_poll */
-
- /* for driver use */
- __REQ_DRV,
- __REQ_SWAP, /* swapping request. */
__REQ_NR_BITS, /* stops here */
};
@@ -439,6 +439,7 @@ enum req_flag_bits {
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
#define REQ_POLLED (1ULL << __REQ_POLLED)
+#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE)
#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_SWAP (1ULL << __REQ_SWAP)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60d016138997..5bdf2ac9142c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -176,6 +176,21 @@ static inline bool disk_live(struct gendisk *disk)
return !inode_unhashed(disk->part0->bd_inode);
}
+/**
+ * disk_openers - returns how many openers are there for a disk
+ * @disk: disk to check
+ *
+ * This returns the number of openers for a disk. Note that this value is only
+ * stable if disk->open_mutex is held.
+ *
+ * Note: Due to a quirk in the block layer open code, each open partition is
+ * only counted once even if there are multiple openers.
+ */
+static inline unsigned int disk_openers(struct gendisk *disk)
+{
+ return atomic_read(&disk->part0->bd_openers);
+}
+
/*
* The gendisk is refcounted by the part0 block_device, and the bd_device
* therein is also used for device model presentation in sysfs.
@@ -248,6 +263,7 @@ struct queue_limits {
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int max_hw_discard_sectors;
+ unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
@@ -540,10 +556,8 @@ struct request_queue {
#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
-#define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */
#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
-#define QUEUE_FLAG_SECERASE 11 /* supports secure erase */
#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */
#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
@@ -582,11 +596,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
#define blk_queue_zone_resetall(q) \
test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
-#define blk_queue_secure_erase(q) \
- (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
#define blk_queue_pci_p2pdma(q) \
test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
@@ -602,7 +613,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
REQ_FAILFAST_DRIVER))
#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
-#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
@@ -950,6 +960,8 @@ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_discard_segments(struct request_queue *,
unsigned short);
+void blk_queue_max_secure_erase_sectors(struct request_queue *q,
+ unsigned int max_sectors);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
@@ -1090,13 +1102,12 @@ static inline long nr_blockdev_pages(void)
extern void blk_io_schedule(void);
-#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */
-
-extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int flags,
- struct bio **biop);
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask);
+int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop);
+int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp);
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
@@ -1115,7 +1126,7 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block,
SECTOR_SHIFT),
nr_blocks << (sb->s_blocksize_bits -
SECTOR_SHIFT),
- gfp_mask, flags);
+ gfp_mask);
}
static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask)
@@ -1189,6 +1200,12 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu
return min(l->max_zone_append_sectors, l->max_sectors);
}
+static inline unsigned int
+bdev_max_zone_append_sectors(struct block_device *bdev)
+{
+ return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+}
+
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
int retval = 512;
@@ -1246,84 +1263,54 @@ bdev_zone_write_granularity(struct block_device *bdev)
return queue_zone_write_granularity(bdev_get_queue(bdev));
}
-static inline int queue_alignment_offset(const struct request_queue *q)
-{
- if (q->limits.misaligned)
- return -1;
+int bdev_alignment_offset(struct block_device *bdev);
+unsigned int bdev_discard_alignment(struct block_device *bdev);
- return q->limits.alignment_offset;
+static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
+{
+ return bdev_get_queue(bdev)->limits.max_discard_sectors;
}
-static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
+static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
{
- unsigned int granularity = max(lim->physical_block_size, lim->io_min);
- unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
- << SECTOR_SHIFT;
+ return bdev_get_queue(bdev)->limits.discard_granularity;
+}
- return (granularity + lim->alignment_offset - alignment) % granularity;
+static inline unsigned int
+bdev_max_secure_erase_sectors(struct block_device *bdev)
+{
+ return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
}
-static inline int bdev_alignment_offset(struct block_device *bdev)
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- if (q->limits.misaligned)
- return -1;
- if (bdev_is_partition(bdev))
- return queue_limit_alignment_offset(&q->limits,
- bdev->bd_start_sect);
- return q->limits.alignment_offset;
+ if (q)
+ return q->limits.max_write_zeroes_sectors;
+
+ return 0;
}
-static inline int queue_discard_alignment(const struct request_queue *q)
+static inline bool bdev_nonrot(struct block_device *bdev)
{
- if (q->limits.discard_misaligned)
- return -1;
-
- return q->limits.discard_alignment;
+ return blk_queue_nonrot(bdev_get_queue(bdev));
}
-static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
+static inline bool bdev_stable_writes(struct block_device *bdev)
{
- unsigned int alignment, granularity, offset;
-
- if (!lim->max_discard_sectors)
- return 0;
-
- /* Why are these in bytes, not sectors? */
- alignment = lim->discard_alignment >> SECTOR_SHIFT;
- granularity = lim->discard_granularity >> SECTOR_SHIFT;
- if (!granularity)
- return 0;
-
- /* Offset of the partition start in 'granularity' sectors */
- offset = sector_div(sector, granularity);
-
- /* And why do we do this modulus *again* in blkdev_issue_discard()? */
- offset = (granularity + alignment - offset) % granularity;
-
- /* Turn it back into bytes, gaah */
- return offset << SECTOR_SHIFT;
+ return test_bit(QUEUE_FLAG_STABLE_WRITES,
+ &bdev_get_queue(bdev)->queue_flags);
}
-static inline int bdev_discard_alignment(struct block_device *bdev)
+static inline bool bdev_write_cache(struct block_device *bdev)
{
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (bdev_is_partition(bdev))
- return queue_limit_discard_alignment(&q->limits,
- bdev->bd_start_sect);
- return q->limits.discard_alignment;
+ return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags);
}
-static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+static inline bool bdev_fua(struct block_device *bdev)
{
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q)
- return q->limits.max_write_zeroes_sectors;
-
- return 0;
+ return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
}
static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
@@ -1491,9 +1478,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
wake_up_process(waiter);
}
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
- unsigned int op);
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+ unsigned int sectors, unsigned int op,
+ unsigned long start_time);
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
unsigned long start_time);
void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 22501a293fa5..623e22492afa 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -27,12 +27,10 @@ struct blk_trace {
atomic_t dropped;
};
-struct blkcg;
-
extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
extern void blk_trace_shutdown(struct request_queue *);
-extern __printf(3, 4)
-void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *fmt, ...);
+__printf(3, 4) void __blk_trace_note_message(struct blk_trace *bt,
+ struct cgroup_subsys_state *css, const char *fmt, ...);
/**
* blk_add_trace_msg - Add a (simple) message to the blktrace stream
@@ -47,14 +45,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
* NOTE: Can not use 'static inline' due to presence of var args...
*
**/
-#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \
+#define blk_add_cgroup_trace_msg(q, css, fmt, ...) \
do { \
struct blk_trace *bt; \
\
rcu_read_lock(); \
bt = rcu_dereference((q)->blk_trace); \
if (unlikely(bt)) \
- __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
+ __blk_trace_note_message(bt, css, fmt, ##__VA_ARGS__);\
rcu_read_unlock(); \
} while (0)
#define blk_add_trace_msg(q, fmt, ...) \
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 0a89f111e00e..67caa909e3e6 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -77,7 +77,6 @@ struct cdrom_device_ops {
int (*tray_move) (struct cdrom_device_info *, int);
int (*lock_door) (struct cdrom_device_info *, int);
int (*select_speed) (struct cdrom_device_info *, int);
- int (*select_disc) (struct cdrom_device_info *, int);
int (*get_last_session) (struct cdrom_device_info *,
struct cdrom_multisession *);
int (*get_mcn) (struct cdrom_device_info *,
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 3431011f364d..cba8a6ffc329 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -287,6 +287,9 @@ struct ceph_osd_linger_request {
rados_watcherrcb_t errcb;
void *data;
+ struct ceph_pagelist *request_pl;
+ struct page **notify_id_pages;
+
struct page ***preply_pages;
size_t *preply_len;
};
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 9cf51e41e697..54dc2f9a2d56 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -167,7 +167,7 @@ static inline int suspend_disable_secondary_cpus(void) { return 0; }
static inline void suspend_enable_secondary_cpus(void) { }
#endif /* !CONFIG_PM_SLEEP_SMP */
-void cpu_startup_entry(enum cpuhp_state state);
+void __noreturn cpu_startup_entry(enum cpuhp_state state);
void cpu_idle_poll_ctrl(bool enable);
diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h
deleted file mode 100644
index 19fa0b5ae5ec..000000000000
--- a/include/linux/dma-buf-map.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Pointer to dma-buf-mapped memory, plus helpers.
- */
-
-#ifndef __DMA_BUF_MAP_H__
-#define __DMA_BUF_MAP_H__
-
-#include <linux/io.h>
-#include <linux/string.h>
-
-/**
- * DOC: overview
- *
- * Calling dma-buf's vmap operation returns a pointer to the buffer's memory.
- * Depending on the location of the buffer, users may have to access it with
- * I/O operations or memory load/store operations. For example, copying to
- * system memory could be done with memcpy(), copying to I/O memory would be
- * done with memcpy_toio().
- *
- * .. code-block:: c
- *
- * void *vaddr = ...; // pointer to system memory
- * memcpy(vaddr, src, len);
- *
- * void *vaddr_iomem = ...; // pointer to I/O memory
- * memcpy_toio(vaddr, _iomem, src, len);
- *
- * When using dma-buf's vmap operation, the returned pointer is encoded as
- * :c:type:`struct dma_buf_map <dma_buf_map>`.
- * :c:type:`struct dma_buf_map <dma_buf_map>` stores the buffer's address in
- * system or I/O memory and a flag that signals the required method of
- * accessing the buffer. Use the returned instance and the helper functions
- * to access the buffer's memory in the correct way.
- *
- * The type :c:type:`struct dma_buf_map <dma_buf_map>` and its helpers are
- * actually independent from the dma-buf infrastructure. When sharing buffers
- * among devices, drivers have to know the location of the memory to access
- * the buffers in a safe way. :c:type:`struct dma_buf_map <dma_buf_map>`
- * solves this problem for dma-buf and its users. If other drivers or
- * sub-systems require similar functionality, the type could be generalized
- * and moved to a more prominent header file.
- *
- * Open-coding access to :c:type:`struct dma_buf_map <dma_buf_map>` is
- * considered bad style. Rather then accessing its fields directly, use one
- * of the provided helper functions, or implement your own. For example,
- * instances of :c:type:`struct dma_buf_map <dma_buf_map>` can be initialized
- * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with
- * dma_buf_map_set_vaddr(). These helpers will set an address in system memory.
- *
- * .. code-block:: c
- *
- * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf);
- *
- * dma_buf_map_set_vaddr(&map, 0xdeadbeaf);
- *
- * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem().
- *
- * .. code-block:: c
- *
- * dma_buf_map_set_vaddr_iomem(&map, 0xdeadbeaf);
- *
- * Instances of struct dma_buf_map do not have to be cleaned up, but
- * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings
- * always refer to system memory.
- *
- * .. code-block:: c
- *
- * dma_buf_map_clear(&map);
- *
- * Test if a mapping is valid with either dma_buf_map_is_set() or
- * dma_buf_map_is_null().
- *
- * .. code-block:: c
- *
- * if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map))
- * // always true
- *
- * Instances of :c:type:`struct dma_buf_map <dma_buf_map>` can be compared
- * for equality with dma_buf_map_is_equal(). Mappings the point to different
- * memory spaces, system or I/O, are never equal. That's even true if both
- * spaces are located in the same address space, both mappings contain the
- * same address value, or both mappings refer to NULL.
- *
- * .. code-block:: c
- *
- * struct dma_buf_map sys_map; // refers to system memory
- * struct dma_buf_map io_map; // refers to I/O memory
- *
- * if (dma_buf_map_is_equal(&sys_map, &io_map))
- * // always false
- *
- * A set up instance of struct dma_buf_map can be used to access or manipulate
- * the buffer memory. Depending on the location of the memory, the provided
- * helpers will pick the correct operations. Data can be copied into the memory
- * with dma_buf_map_memcpy_to(). The address can be manipulated with
- * dma_buf_map_incr().
- *
- * .. code-block:: c
- *
- * const void *src = ...; // source buffer
- * size_t len = ...; // length of src
- *
- * dma_buf_map_memcpy_to(&map, src, len);
- * dma_buf_map_incr(&map, len); // go to first byte after the memcpy
- */
-
-/**
- * struct dma_buf_map - Pointer to vmap'ed dma-buf memory.
- * @vaddr_iomem: The buffer's address if in I/O memory
- * @vaddr: The buffer's address if in system memory
- * @is_iomem: True if the dma-buf memory is located in I/O
- * memory, or false otherwise.
- */
-struct dma_buf_map {
- union {
- void __iomem *vaddr_iomem;
- void *vaddr;
- };
- bool is_iomem;
-};
-
-/**
- * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory
- * @vaddr_: A system-memory address
- */
-#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \
- { \
- .vaddr = (vaddr_), \
- .is_iomem = false, \
- }
-
-/**
- * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory
- * @map: The dma-buf mapping structure
- * @vaddr: A system-memory address
- *
- * Sets the address and clears the I/O-memory flag.
- */
-static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr)
-{
- map->vaddr = vaddr;
- map->is_iomem = false;
-}
-
-/**
- * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory
- * @map: The dma-buf mapping structure
- * @vaddr_iomem: An I/O-memory address
- *
- * Sets the address and the I/O-memory flag.
- */
-static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map,
- void __iomem *vaddr_iomem)
-{
- map->vaddr_iomem = vaddr_iomem;
- map->is_iomem = true;
-}
-
-/**
- * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality
- * @lhs: The dma-buf mapping structure
- * @rhs: A dma-buf mapping structure to compare with
- *
- * Two dma-buf mapping structures are equal if they both refer to the same type of memory
- * and to the same address within that memory.
- *
- * Returns:
- * True is both structures are equal, or false otherwise.
- */
-static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs,
- const struct dma_buf_map *rhs)
-{
- if (lhs->is_iomem != rhs->is_iomem)
- return false;
- else if (lhs->is_iomem)
- return lhs->vaddr_iomem == rhs->vaddr_iomem;
- else
- return lhs->vaddr == rhs->vaddr;
-}
-
-/**
- * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL
- * @map: The dma-buf mapping structure
- *
- * Depending on the state of struct dma_buf_map.is_iomem, tests if the
- * mapping is NULL.
- *
- * Returns:
- * True if the mapping is NULL, or false otherwise.
- */
-static inline bool dma_buf_map_is_null(const struct dma_buf_map *map)
-{
- if (map->is_iomem)
- return !map->vaddr_iomem;
- return !map->vaddr;
-}
-
-/**
- * dma_buf_map_is_set - Tests is the dma-buf mapping has been set
- * @map: The dma-buf mapping structure
- *
- * Depending on the state of struct dma_buf_map.is_iomem, tests if the
- * mapping has been set.
- *
- * Returns:
- * True if the mapping is been set, or false otherwise.
- */
-static inline bool dma_buf_map_is_set(const struct dma_buf_map *map)
-{
- return !dma_buf_map_is_null(map);
-}
-
-/**
- * dma_buf_map_clear - Clears a dma-buf mapping structure
- * @map: The dma-buf mapping structure
- *
- * Clears all fields to zero; including struct dma_buf_map.is_iomem. So
- * mapping structures that were set to point to I/O memory are reset for
- * system memory. Pointers are cleared to NULL. This is the default.
- */
-static inline void dma_buf_map_clear(struct dma_buf_map *map)
-{
- if (map->is_iomem) {
- map->vaddr_iomem = NULL;
- map->is_iomem = false;
- } else {
- map->vaddr = NULL;
- }
-}
-
-/**
- * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping
- * @dst: The dma-buf mapping structure
- * @src: The source buffer
- * @len: The number of byte in src
- *
- * Copies data into a dma-buf mapping. The source buffer is in system
- * memory. Depending on the buffer's location, the helper picks the correct
- * method of accessing the memory.
- */
-static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len)
-{
- if (dst->is_iomem)
- memcpy_toio(dst->vaddr_iomem, src, len);
- else
- memcpy(dst->vaddr, src, len);
-}
-
-/**
- * dma_buf_map_incr - Increments the address stored in a dma-buf mapping
- * @map: The dma-buf mapping structure
- * @incr: The number of bytes to increment
- *
- * Increments the address stored in a dma-buf mapping. Depending on the
- * buffer's location, the correct value will be updated.
- */
-static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr)
-{
- if (map->is_iomem)
- map->vaddr_iomem += incr;
- else
- map->vaddr += incr;
-}
-
-#endif /* __DMA_BUF_MAP_H__ */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ccd4d3f91c98..0412304ce34e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -213,6 +213,8 @@ struct capsule_info {
size_t page_bytes_remain;
};
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+ size_t hdr_bytes);
int __efi_capsule_setup_info(struct capsule_info *cap_info);
/*
@@ -383,6 +385,7 @@ void efi_native_runtime_setup(void);
#define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
#define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d)
#define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9)
+#define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9)
#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
@@ -405,6 +408,20 @@ void efi_native_runtime_setup(void);
#define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
#define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68)
#define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
+#define LINUX_EFI_COCO_SECRET_AREA_GUID EFI_GUID(0xadf956ad, 0xe98c, 0x484c, 0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47)
+
+#define RISCV_EFI_BOOT_PROTOCOL_GUID EFI_GUID(0xccd15fec, 0x6f73, 0x4eec, 0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf)
+
+/*
+ * This GUID may be installed onto the kernel image's handle as a NULL protocol
+ * to signal to the stub that the placement of the image should be respected,
+ * and moving the image in physical memory is undesirable. To ensure
+ * compatibility with 64k pages kernels with virtually mapped stacks, and to
+ * avoid defeating physical randomization, this protocol should only be
+ * installed if the image was placed at a randomized 128k aligned address in
+ * memory.
+ */
+#define LINUX_EFI_LOADED_IMAGE_FIXED_GUID EFI_GUID(0xf5a37b6d, 0x3344, 0x42a5, 0xb6, 0xbb, 0x97, 0x86, 0x48, 0xc1, 0x89, 0x0a)
/* OEM GUIDs */
#define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
@@ -435,6 +452,7 @@ typedef struct {
} efi_config_table_type_t;
#define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
+#define EFI_DXE_SERVICES_TABLE_SIGNATURE ((u64)0x565245535f455844ULL)
#define EFI_2_30_SYSTEM_TABLE_REVISION ((2 << 16) | (30))
#define EFI_2_20_SYSTEM_TABLE_REVISION ((2 << 16) | (20))
@@ -596,6 +614,7 @@ extern struct efi {
unsigned long tpm_log; /* TPM2 Event Log table */
unsigned long tpm_final_log; /* TPM2 Final Events Log table */
unsigned long mokvar_table; /* MOK variable config table */
+ unsigned long coco_secret; /* Confidential computing secret table */
efi_get_time_t *get_time;
efi_set_time_t *set_time;
@@ -1335,4 +1354,12 @@ extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { }
#endif
+struct linux_efi_coco_secret_area {
+ u64 base_pa;
+ u64 size;
+};
+
+/* Header of a populated EFI secret area */
+#define EFI_SECRET_TABLE_HEADER_GUID EFI_GUID(0x1e74f542, 0x71dd, 0x4d66, 0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b)
+
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbde95387a23..87b5af1d9fbe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1953,6 +1953,7 @@ struct dir_context {
#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
struct iov_iter;
+struct io_uring_cmd;
struct file_operations {
struct module *owner;
@@ -1995,6 +1996,7 @@ struct file_operations {
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
+ int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
} __randomize_layout;
struct inode_operations {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 53c1b6082a4c..ac2a1d758a80 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -169,6 +169,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool isolate_huge_page(struct page *page, struct list_head *list);
int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
void putback_active_hugepage(struct page *page);
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
void free_huge_page(struct page *page);
@@ -378,6 +379,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
return 0;
}
+static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+ return 0;
+}
+
static inline void putback_active_hugepage(struct page *page)
{
}
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 1814e698d861..4a2f6cc5a492 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -5,11 +5,37 @@
#include <linux/sched.h>
#include <linux/xarray.h>
+enum io_uring_cmd_flags {
+ IO_URING_F_COMPLETE_DEFER = 1,
+ IO_URING_F_UNLOCKED = 2,
+ /* int's last bit, sign checks are usually faster than a bit test */
+ IO_URING_F_NONBLOCK = INT_MIN,
+
+ /* ctx state flags, for URING_CMD */
+ IO_URING_F_SQE128 = 4,
+ IO_URING_F_CQE32 = 8,
+ IO_URING_F_IOPOLL = 16,
+};
+
+struct io_uring_cmd {
+ struct file *file;
+ const void *cmd;
+ /* callback to defer completions to task context */
+ void (*task_work_cb)(struct io_uring_cmd *cmd);
+ u32 cmd_op;
+ u32 pad;
+ u8 pdu[32]; /* available inline for free use */
+};
+
#if defined(CONFIG_IO_URING)
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *));
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
+const char *io_uring_get_opcode(u8 opcode);
static inline void io_uring_files_cancel(void)
{
@@ -29,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk);
}
#else
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+ ssize_t ret2)
+{
+}
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *))
+{
+}
static inline struct sock *io_uring_get_socket(struct file *file)
{
return NULL;
@@ -42,6 +76,10 @@ static inline void io_uring_files_cancel(void)
static inline void io_uring_free(struct task_struct *tsk)
{
}
+static inline const char *io_uring_get_opcode(u8 opcode)
+{
+ return "";
+}
#endif
#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a890428bcc1a..fe6efb24d151 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -285,7 +285,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
return buf;
}
-extern int hex_to_bin(char ch);
+extern int hex_to_bin(unsigned char ch);
extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
extern char *bin2hex(char *dst, const void *src, size_t count);
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index de5d75bafd66..30e5bec81d2b 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -222,9 +222,5 @@ void kthread_associate_blkcg(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *kthread_blkcg(void);
#else
static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { }
-static inline struct cgroup_subsys_state *kthread_blkcg(void)
-{
- return NULL;
-}
#endif
#endif /* _LINUX_KTHREAD_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3f9b22c4983a..34eed5f85ed6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -315,7 +315,10 @@ struct kvm_vcpu {
int cpu;
int vcpu_id; /* id given by userspace at creation */
int vcpu_idx; /* index in kvm->vcpus array */
- int srcu_idx;
+ int ____srcu_idx; /* Don't use this directly. You've been warned. */
+#ifdef CONFIG_PROVE_RCU
+ int srcu_depth;
+#endif
int mode;
u64 requests;
unsigned long guest_debug;
@@ -840,6 +843,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
unlikely(__ret); \
})
+static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PROVE_RCU
+ WARN_ONCE(vcpu->srcu_depth++,
+ "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1);
+#endif
+ vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+}
+
+static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu)
+{
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx);
+
+#ifdef CONFIG_PROVE_RCU
+ WARN_ONCE(--vcpu->srcu_depth,
+ "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth);
+#endif
+}
+
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
{
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
@@ -2197,6 +2219,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);
+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
+
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
#else
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9b1d3d8b1252..732de9014626 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -820,7 +820,6 @@ struct ata_port {
unsigned int cbl; /* cable type; ATA_CBL_xxx */
struct ata_queued_cmd qcmd[ATA_MAX_QUEUE + 1];
- unsigned long sas_tag_allocated; /* for sas tag allocation only */
u64 qc_active;
int nr_active_links; /* #links with active qcs */
unsigned int sas_last_tag; /* track next tag hw expects */
@@ -1111,7 +1110,7 @@ extern void ata_unpack_xfermask(unsigned long xfer_mask,
unsigned long *udma_mask);
extern u8 ata_xfer_mask2mode(unsigned long xfer_mask);
extern unsigned long ata_xfer_mode2mask(u8 xfer_mode);
-extern int ata_xfer_mode2shift(unsigned long xfer_mode);
+extern int ata_xfer_mode2shift(u8 xfer_mode);
extern const char *ata_mode_string(unsigned long xfer_mask);
extern unsigned long ata_id_xfermask(const u16 *id);
extern int ata_std_qc_defer(struct ata_queued_cmd *qc);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a68dce3873fc..89b14729d59f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1012,6 +1012,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
}
void mem_cgroup_flush_stats(void);
+void mem_cgroup_flush_stats_delayed(void);
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
@@ -1455,6 +1456,10 @@ static inline void mem_cgroup_flush_stats(void)
{
}
+static inline void mem_cgroup_flush_stats_delayed(void)
+{
+}
+
static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e34edb775334..9f44254af8ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3197,6 +3197,14 @@ extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
+#ifdef CONFIG_MEMORY_FAILURE
+extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+#else
+static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+ return 0;
+}
+#endif
#ifndef arch_memory_failure
static inline int arch_memory_failure(unsigned long pfn, int flags)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 151607e9d64a..955aee14b0f7 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -389,10 +389,8 @@ struct mtd_info {
/* List of partitions attached to this MTD device */
struct list_head partitions;
- union {
- struct mtd_part part;
- struct mtd_master master;
- };
+ struct mtd_part part;
+ struct mtd_master master;
};
static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd)
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 2c6b9e416225..7c2d77d75a88 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -169,7 +169,7 @@ enum {
#define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD)
#define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP)
-/* Finds the next feature with the highest number of the range of start till 0.
+/* Finds the next feature with the highest number of the range of start-1 till 0.
*/
static inline int find_next_netdev_feature(u64 feature, unsigned long start)
{
@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
for ((bit) = find_next_netdev_feature((mask_addr), \
NETDEV_FEATURE_COUNT); \
(bit) >= 0; \
- (bit) = find_next_netdev_feature((mask_addr), (bit) - 1))
+ (bit) = find_next_netdev_feature((mask_addr), (bit)))
/* Features valid for ethtool to change */
/* = all defined minus driver/device-class-related */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59e27a2b7bf0..f736c020cde2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -199,10 +199,10 @@ struct net_device_stats {
* Try to fit them in a single cache line, for dev_get_stats() sake.
*/
struct net_device_core_stats {
- local_t rx_dropped;
- local_t tx_dropped;
- local_t rx_nohandler;
-} __aligned(4 * sizeof(local_t));
+ unsigned long rx_dropped;
+ unsigned long tx_dropped;
+ unsigned long rx_nohandler;
+} __aligned(4 * sizeof(unsigned long));
#include <linux/cache.h>
#include <linux/skbuff.h>
@@ -900,7 +900,7 @@ struct net_device_path_stack {
struct net_device_path_ctx {
const struct net_device *dev;
- const u8 *daddr;
+ u8 daddr[ETH_ALEN];
int num_vlans;
struct {
@@ -3843,15 +3843,15 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
return false;
}
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev);
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
-static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev)
+static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
{
/* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
if (likely(p))
- return this_cpu_ptr(p);
+ return p;
return netdev_core_stats_alloc(dev);
}
@@ -3859,14 +3859,11 @@ static inline struct net_device_core_stats *dev_core_stats(struct net_device *de
#define DEV_CORE_STATS_INC(FIELD) \
static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \
{ \
- struct net_device_core_stats *p; \
+ struct net_device_core_stats __percpu *p; \
\
- preempt_disable(); \
p = dev_core_stats(dev); \
- \
if (p) \
- local_inc(&p->FIELD); \
- preempt_enable(); \
+ this_cpu_inc(p->FIELD); \
}
DEV_CORE_STATS_INC(rx_dropped)
DEV_CORE_STATS_INC(tx_dropped)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index f626a445d1a8..29ec3e3481ff 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -137,6 +137,7 @@ enum {
NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
* Space Control
*/
+ NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */
NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
@@ -161,6 +162,9 @@ enum {
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
+#define NVME_CRTO_CRIMT(crto) ((crto) >> 16)
+#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff)
+
enum {
NVME_CMBSZ_SQS = 1 << 0,
NVME_CMBSZ_CQS = 1 << 1,
@@ -204,8 +208,10 @@ enum {
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
- NVME_CAP_CSS_NVM = 1 << 0,
- NVME_CAP_CSS_CSI = 1 << 6,
+ NVME_CC_CRIME = 1 << 24,
+};
+
+enum {
NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4,
@@ -214,10 +220,23 @@ enum {
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
NVME_CSTS_SHST_MASK = 3 << 2,
+};
+
+enum {
NVME_CMBMSC_CRE = 1 << 0,
NVME_CMBMSC_CMSE = 1 << 1,
};
+enum {
+ NVME_CAP_CSS_NVM = 1 << 0,
+ NVME_CAP_CSS_CSI = 1 << 6,
+};
+
+enum {
+ NVME_CAP_CRMS_CRIMS = 1ULL << 59,
+ NVME_CAP_CRMS_CRWMS = 1ULL << 60,
+};
+
struct nvme_id_power_state {
__le16 max_power; /* centiwatts */
__u8 rsvd2;
@@ -405,6 +424,21 @@ struct nvme_id_ns {
__u8 vs[3712];
};
+/* I/O Command Set Independent Identify Namespace Data Structure */
+struct nvme_id_ns_cs_indep {
+ __u8 nsfeat;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __le32 anagrpid;
+ __u8 nsattr;
+ __u8 rsvd9;
+ __le16 nvmsetid;
+ __le16 endgid;
+ __u8 nstat;
+ __u8 rsvd15[4081];
+};
+
struct nvme_zns_lbafe {
__le64 zsze;
__u8 zdes;
@@ -469,6 +503,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
+ NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12,
@@ -523,6 +558,10 @@ enum {
};
enum {
+ NVME_NSTAT_NRDY = 1 << 0,
+};
+
+enum {
NVME_NVM_NS_16B_GUARD = 0,
NVME_NVM_NS_32B_GUARD = 1,
NVME_NVM_NS_64B_GUARD = 2,
@@ -1583,6 +1622,7 @@ enum {
NVME_SC_NS_WRITE_PROTECTED = 0x20,
NVME_SC_CMD_INTERRUPTED = 0x21,
NVME_SC_TRANSIENT_TR_ERR = 0x22,
+ NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
NVME_SC_INVALID_IO_CMD_SET = 0x2C,
NVME_SC_LBA_RANGE = 0x80,
@@ -1679,9 +1719,11 @@ enum {
/*
* Path-related Errors:
*/
+ NVME_SC_INTERNAL_PATH_ERROR = 0x300,
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
NVME_SC_ANA_TRANSITION = 0x303,
+ NVME_SC_CTRL_PATH_ERROR = 0x360,
NVME_SC_HOST_PATH_ERROR = 0x370,
NVME_SC_HOST_ABORTED_CMD = 0x371,
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index 060e8d203181..1766e1de6956 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -34,15 +34,19 @@ posix_acl_xattr_count(size_t size)
#ifdef CONFIG_FS_POSIX_ACL
void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
void *value, size_t size);
void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
void *value, size_t size);
#else
static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
void *value, size_t size)
{
}
static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
void *value, size_t size)
{
}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e7c39c200e2b..1a32036c918c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -196,6 +196,7 @@ void synchronize_rcu_tasks_rude(void);
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
#define rcu_tasks_qs(t, preempt) do { } while (0)
#define rcu_note_voluntary_context_switch(t) do { } while (0)
#define call_rcu_tasks call_rcu
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d5e3c00b74e1..b3278f8184d5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1443,6 +1443,7 @@ struct task_struct {
int pagefault_disabled;
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
+ struct timer_list oom_reaper_timer;
#endif
#ifdef CONFIG_VMAP_STACK
struct vm_struct *stack_vm_area;
@@ -2117,6 +2118,47 @@ static inline void cond_resched_rcu(void)
#endif
}
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+extern bool preempt_model_none(void);
+extern bool preempt_model_voluntary(void);
+extern bool preempt_model_full(void);
+
+#else
+
+static inline bool preempt_model_none(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_NONE);
+}
+static inline bool preempt_model_voluntary(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
+}
+static inline bool preempt_model_full(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT);
+}
+
+#endif
+
+static inline bool preempt_model_rt(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_RT);
+}
+
+/*
+ * Does the preemption model allow non-cooperative preemption?
+ *
+ * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
+ * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
+ * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
+ * PREEMPT_NONE model.
+ */
+static inline bool preempt_model_preemptible(void)
+{
+ return preempt_model_full() || preempt_model_rt();
+}
+
/*
* Does a critical section need to be broken due to another
* task waiting?: (technically does not depend on CONFIG_PREEMPTION,
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a80356e9dc69..1ad1f4bfa025 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -136,6 +136,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
#endif /* CONFIG_MEMCG */
#ifdef CONFIG_MMU
+#ifndef arch_get_mmap_end
+#define arch_get_mmap_end(addr) (TASK_SIZE)
+#endif
+
+#ifndef arch_get_mmap_base
+#define arch_get_mmap_base(addr, base) (base)
+#endif
+
extern void arch_pick_mmap_layout(struct mm_struct *mm,
struct rlimit *rlim_stack);
extern unsigned long
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 3c8b34876744..66b689f6cfcb 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -356,13 +356,22 @@ static inline void clear_notify_signal(void)
}
/*
+ * Returns 'true' if kick_process() is needed to force a transition from
+ * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work.
+ */
+static inline bool __set_notify_signal(struct task_struct *task)
+{
+ return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+ !wake_up_state(task, TASK_INTERRUPTIBLE);
+}
+
+/*
* Called to break out of interruptible wait loops, and enter the
* exit_to_user_mode_loop().
*/
static inline void set_notify_signal(struct task_struct *task)
{
- if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
- !wake_up_state(task, TASK_INTERRUPTIBLE))
+ if (__set_notify_signal(task))
kick_process(task);
}
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6f85f5d957ef..17311ad9f9af 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -50,6 +50,9 @@ struct linger {
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
+
+ int msg_inq; /* output, data left in socket */
+
struct iov_iter msg_iter; /* data */
/*
@@ -62,8 +65,9 @@ struct msghdr {
void __user *msg_control_user;
};
bool msg_control_is_user : 1;
- __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ bool msg_get_inq : 1;/* return INQ after receive */
unsigned int msg_flags; /* flags on received message */
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
@@ -434,6 +438,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
+extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index cb1f4351e8ba..e3014319d1ad 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -47,11 +47,9 @@ struct srcu_data {
*/
struct srcu_node {
spinlock_t __private lock;
- unsigned long srcu_have_cbs[4]; /* GP seq for children */
- /* having CBs, but only */
- /* is > ->srcu_gq_seq. */
- unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */
- /* have CBs for given GP? */
+ unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
+ /* if greater than ->srcu_gq_seq. */
+ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
struct srcu_node *srcu_parent; /* Next up in tree. */
int grplo; /* Least CPU for node. */
@@ -62,18 +60,24 @@ struct srcu_node {
* Per-SRCU-domain structure, similar in function to rcu_state.
*/
struct srcu_struct {
- struct srcu_node node[NUM_RCU_NODES]; /* Combining tree. */
+ struct srcu_node *node; /* Combining tree. */
struct srcu_node *level[RCU_NUM_LVLS + 1];
/* First node at each level. */
+ int srcu_size_state; /* Small-to-big transition state. */
struct mutex srcu_cb_mutex; /* Serialize CB preparation. */
- spinlock_t __private lock; /* Protect counters */
+ spinlock_t __private lock; /* Protect counters and size state. */
struct mutex srcu_gp_mutex; /* Serialize GP work. */
unsigned int srcu_idx; /* Current rdr array element. */
unsigned long srcu_gp_seq; /* Grace-period seq #. */
unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */
unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
+ unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */
unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */
+ unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */
+ unsigned long srcu_n_lock_retries; /* Contention events in current interval. */
+ unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */
struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
+ bool sda_is_static; /* May ->sda be passed to free_percpu()? */
unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */
struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */
struct completion srcu_barrier_completion;
@@ -81,10 +85,23 @@ struct srcu_struct {
atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */
/* callback for the barrier */
/* operation. */
+ unsigned long reschedule_jiffies;
+ unsigned long reschedule_count;
struct delayed_work work;
struct lockdep_map dep_map;
};
+/* Values for size state variable (->srcu_size_state). */
+#define SRCU_SIZE_SMALL 0
+#define SRCU_SIZE_ALLOC 1
+#define SRCU_SIZE_WAIT_BARRIER 2
+#define SRCU_SIZE_WAIT_CALL 3
+#define SRCU_SIZE_WAIT_CBS1 4
+#define SRCU_SIZE_WAIT_CBS2 5
+#define SRCU_SIZE_WAIT_CBS3 6
+#define SRCU_SIZE_WAIT_CBS4 7
+#define SRCU_SIZE_BIG 8
+
/* Values for state variable (bottom bits of ->srcu_gp_seq). */
#define SRCU_STATE_IDLE 0
#define SRCU_STATE_SCAN1 1
@@ -121,6 +138,7 @@ struct srcu_struct {
#ifdef MODULE
# define __DEFINE_SRCU(name, is_static) \
is_static struct srcu_struct name; \
+ extern struct srcu_struct * const __srcu_struct_##name; \
struct srcu_struct * const __srcu_struct_##name \
__section("___srcu_struct_ptrs") = &name
#else
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 24eea1b05ca2..29917850f079 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -270,5 +270,6 @@ struct plat_stmmacenet_data {
int msi_rx_base_vec;
int msi_tx_base_vec;
bool use_phy_wol;
+ bool sph_disable;
};
#endif
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 267b7aeaf1a6..90501404fa49 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -160,6 +160,7 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9)
#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
+#define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 897494b597ba..795ef5a68429 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -17,6 +17,7 @@ enum task_work_notify_mode {
TWA_NONE,
TWA_RESUME,
TWA_SIGNAL,
+ TWA_SIGNAL_NO_IPI,
};
static inline bool task_work_pending(struct task_struct *task)
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 63fa4196e51c..7038104463e4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -118,7 +118,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
_torture_stop_kthread("Stopping " #n " task", &(tp))
#ifdef CONFIG_PREEMPTION
-#define torture_preempt_schedule() preempt_schedule()
+#define torture_preempt_schedule() __preempt_schedule()
#else
#define torture_preempt_schedule() do { } while (0)
#endif
diff --git a/include/linux/usb/pd_bdo.h b/include/linux/usb/pd_bdo.h
index 033fe3e17141..7c25b88d79f9 100644
--- a/include/linux/usb/pd_bdo.h
+++ b/include/linux/usb/pd_bdo.h
@@ -15,7 +15,7 @@
#define BDO_MODE_CARRIER2 (5 << 28)
#define BDO_MODE_CARRIER3 (6 << 28)
#define BDO_MODE_EYE (7 << 28)
-#define BDO_MODE_TESTDATA (8 << 28)
+#define BDO_MODE_TESTDATA (8U << 28)
#define BDO_MODE_MASK(mode) ((mode) & 0xf0000000)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3b1df7da402d..b159c2789961 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -26,7 +26,7 @@ struct notifier_block; /* in notifier.h */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
-#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
+#define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
!defined(CONFIG_KASAN_VMALLOC)
@@ -153,7 +153,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
const void *caller) __alloc_size(1);
void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller) __alloc_size(1);
-void *vmalloc_no_huge(unsigned long size) __alloc_size(1);
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2);
diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h
index 7c93f5177532..9c0ad64b8d29 100644
--- a/include/memory/renesas-rpc-if.h
+++ b/include/memory/renesas-rpc-if.h
@@ -72,6 +72,7 @@ struct rpcif {
enum rpcif_type type;
enum rpcif_data_dir dir;
u8 bus_size;
+ u8 xfer_size;
void *buffer;
u32 xferlen;
u32 smcr;
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 5cb095b09a94..69ef31cea582 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -578,6 +578,7 @@ enum {
#define HCI_ERROR_CONNECTION_TIMEOUT 0x08
#define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d
#define HCI_ERROR_REJ_BAD_ADDR 0x0f
+#define HCI_ERROR_INVALID_PARAMETERS 0x12
#define HCI_ERROR_REMOTE_USER_TERM 0x13
#define HCI_ERROR_REMOTE_LOW_RESOURCES 0x14
#define HCI_ERROR_REMOTE_POWER_OFF 0x15
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d5377740e99c..62d7b81b1cb7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -36,6 +36,9 @@
/* HCI priority */
#define HCI_PRIO_MAX 7
+/* HCI maximum id value */
+#define HCI_MAX_ID 10000
+
/* HCI Core structures */
struct inquiry_data {
bdaddr_t bdaddr;
@@ -1156,7 +1159,7 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
-void hci_le_conn_failed(struct hci_conn *conn, u8 status);
+void hci_conn_failed(struct hci_conn *conn, u8 status);
/*
* hci_conn_get() and hci_conn_put() are used to control the life-time of an
diff --git a/include/net/esp.h b/include/net/esp.h
index 90cd02ff77ef..9c5637d41d95 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -4,8 +4,6 @@
#include <linux/skbuff.h>
-#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
-
struct ip_esp_hdr;
static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index f72ec113ae56..98e1ec1a14f0 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -425,7 +425,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
struct inet_timewait_sock **));
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 463ae5d33eb0..5b47545f22d3 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -71,7 +71,6 @@ struct inet_timewait_sock {
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
- u32 tw_bslot; /* bind bucket slot */
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
};
@@ -110,6 +109,8 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
+
static inline
struct net *twsk_net(const struct inet_timewait_sock *twsk)
{
diff --git a/include/net/ip.h b/include/net/ip.h
index 3984f2c39c4b..0161137914cf 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -56,6 +56,7 @@ struct inet_skb_parm {
#define IPSKB_DOREDIRECT BIT(5)
#define IPSKB_FRAG_PMTU BIT(6)
#define IPSKB_L3SLAVE BIT(7)
+#define IPSKB_NOPOLICY BIT(8)
u16 frag_max_size;
};
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index a38c4f1e4e5c..74b369bddf49 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -58,7 +58,7 @@ struct ip6_tnl {
/* These fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
- __u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int hlen; /* tun_hlen + encap_hlen */
int tun_hlen; /* Precalculated header length */
int encap_hlen; /* Encap header length (FOU,GUE) */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 0219fe907b26..c24fa934221d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -116,7 +116,7 @@ struct ip_tunnel {
/* These four fields used only by GRE */
u32 i_seqno; /* The last seen seqno */
- u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
/* These four fields used only by ERSPAN */
@@ -243,11 +243,18 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
int proto,
__be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
+ __be32 key, __u8 tos,
+ struct net *net, int oif,
__u32 mark, __u32 tun_inner_hash)
{
memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
+
+ if (oif) {
+ fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index_rcu(net, oif);
+ /* Legacy VRF/l3mdev use case */
+ fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif;
+ }
+
fl4->daddr = daddr;
fl4->saddr = saddr;
fl4->flowi4_tos = tos;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 3d83b64471d3..b4af4837d80b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -75,8 +75,8 @@ struct netns_ipv6 {
struct list_head fib6_walkers;
rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock;
- unsigned int ip6_rt_gc_expire;
- unsigned long ip6_rt_last_gc;
+ atomic_t ip6_rt_gc_expire;
+ unsigned long ip6_rt_last_gc;
unsigned char flowlabel_has_excl;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
bool fib6_has_custom_rules;
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index d7d2495f83c2..dac91aa38c5a 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -4,8 +4,8 @@
#include <linux/types.h>
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 748cf87a4d7e..3e02709a1df6 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -14,6 +14,7 @@ struct tcf_pedit {
struct tc_action common;
unsigned char tcfp_nkeys;
unsigned char tcfp_flags;
+ u32 tcfp_off_max_hint;
struct tc_pedit_key *tcfp_keys;
struct tcf_pedit_key_ex *tcfp_keys_ex;
};
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70ca4a5e330a..cc1295037533 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -480,6 +480,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
u32 cookie);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb);
#ifdef CONFIG_SYN_COOKIES
@@ -620,6 +621,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk, struct sk_buff *skb);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
+void tcp_check_space(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@ -1042,6 +1044,7 @@ struct rate_sample {
int losses; /* number of packets marked lost upon ACK */
u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
u32 prior_in_flight; /* in flight before this ACK */
+ u32 last_end_seq; /* end_seq of most recently ACKed packet */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
@@ -1164,6 +1167,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
bool is_sack_reneg, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk);
+static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
+{
+ return t1 > t2 || (t1 == t2 && after(seq1, seq2));
+}
+
/* These functions determine how the current flow behaves in respect of SACK
* handling. SACK is negotiated with the peer, and therefore it can vary
* between different flows.
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6fb899ff5afc..d2efddce65d4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1093,6 +1093,18 @@ static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb,
return false;
}
+static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb,
+ int dir, unsigned short family)
+{
+ if (dir != XFRM_POLICY_OUT && family == AF_INET) {
+ /* same dst may be used for traffic originating from
+ * devices with different policy settings.
+ */
+ return IPCB(skb)->flags & IPSKB_NOPOLICY;
+ }
+ return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY);
+}
+
static inline int __xfrm_policy_check2(struct sock *sk, int dir,
struct sk_buff *skb,
unsigned int family, int reverse)
@@ -1104,7 +1116,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
return __xfrm_policy_check(sk, ndir, skb, family);
return __xfrm_check_nopolicy(net, skb, dir) ||
- (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+ __xfrm_check_dev_nopolicy(skb, dir, family) ||
__xfrm_policy_check(sk, ndir, skb, family);
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 5554ee75e7da..647722e847b4 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -97,6 +97,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
u16 queue_id, u16 flags);
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
struct net_device *dev, u16 queue_id);
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs);
void xp_destroy(struct xsk_buff_pool *pool);
void xp_get_pool(struct xsk_buff_pool *pool);
bool xp_put_pool(struct xsk_buff_pool *pool);
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 7b2bf9b1fe69..de26c992f821 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -681,7 +681,6 @@ struct ocelot_vcap_id {
struct ocelot_vcap_filter {
struct list_head list;
- struct list_head trap_list;
enum ocelot_vcap_filter_type type;
int block_id;
@@ -695,6 +694,7 @@ struct ocelot_vcap_filter {
struct ocelot_vcap_stats stats;
/* For VCAP IS1 and IS2 */
bool take_ts;
+ bool is_trap;
unsigned long ingress_port_mask;
/* For VCAP ES0 */
struct ocelot_vcap_port ingress_port;
diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index a52080407b98..766dc6f009c0 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -179,6 +179,10 @@ struct snd_soc_component_driver {
struct snd_pcm_hw_params *params);
bool use_dai_pcm_id; /* use DAI link PCM ID as PCM device number */
int be_pcm_base; /* base device ID for all BE PCMs */
+
+#ifdef CONFIG_DEBUG_FS
+ const char *debugfs_prefix;
+#endif
};
struct snd_soc_component {
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 675f3a1fe613..773963a1e0b5 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -14,7 +14,7 @@
#define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2
#define TRANSPORT_FLAG_PASSTHROUGH_PGR 0x4
-struct request_queue;
+struct block_device;
struct scatterlist;
struct target_backend_ops {
@@ -117,7 +117,7 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
bool target_sense_desc_format(struct se_device *dev);
sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
- struct request_queue *q);
+ struct block_device *bdev);
static inline bool target_dev_configured(struct se_device *se_dev)
{
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index cddf5b6fbeb4..66fcc5a1a5b1 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -7,6 +7,7 @@
#include <linux/tracepoint.h>
#include <uapi/linux/io_uring.h>
+#include <linux/io_uring.h>
struct io_wq_work;
@@ -147,7 +148,7 @@ TRACE_EVENT(io_uring_queue_async_work,
TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode,
unsigned int flags, struct io_wq_work *work, int rw),
- TP_ARGS(ctx, req, user_data, flags, opcode, work, rw),
+ TP_ARGS(ctx, req, user_data, opcode, flags, work, rw),
TP_STRUCT__entry (
__field( void *, ctx )
@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->rw = rw;
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
);
@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
__entry->opcode = opcode;
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d",
- __entry->ctx, __entry->req, __entry->data, __entry->opcode)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
+ __entry->ctx, __entry->req, __entry->data,
+ io_uring_get_opcode(__entry->opcode))
);
/**
@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
__entry->link = link;
),
- TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
- __entry->link)
+ TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode), __entry->link)
);
/**
@@ -318,13 +321,16 @@ TRACE_EVENT(io_uring_fail_link,
* @user_data: user data associated with the request
* @res: result of the request
* @cflags: completion flags
+ * @extra1: extra 64-bit data for CQE32
+ * @extra2: extra 64-bit data for CQE32
*
*/
TRACE_EVENT(io_uring_complete,
- TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags),
+ TP_PROTO(void *ctx, void *req, u64 user_data, int res, unsigned cflags,
+ u64 extra1, u64 extra2),
- TP_ARGS(ctx, req, user_data, res, cflags),
+ TP_ARGS(ctx, req, user_data, res, cflags, extra1, extra2),
TP_STRUCT__entry (
__field( void *, ctx )
@@ -332,6 +338,8 @@ TRACE_EVENT(io_uring_complete,
__field( u64, user_data )
__field( int, res )
__field( unsigned, cflags )
+ __field( u64, extra1 )
+ __field( u64, extra2 )
),
TP_fast_assign(
@@ -340,12 +348,17 @@ TRACE_EVENT(io_uring_complete,
__entry->user_data = user_data;
__entry->res = res;
__entry->cflags = cflags;
+ __entry->extra1 = extra1;
+ __entry->extra2 = extra2;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x",
+ TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x "
+ "extra1 %llu extra2 %llu ",
__entry->ctx, __entry->req,
__entry->user_data,
- __entry->res, __entry->cflags)
+ __entry->res, __entry->cflags,
+ (unsigned long long) __entry->extra1,
+ (unsigned long long) __entry->extra2)
);
/**
@@ -389,9 +402,9 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->sq_thread = sq_thread;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, "
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
- __entry->user_data, __entry->opcode,
+ __entry->user_data, io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
@@ -433,8 +446,9 @@ TRACE_EVENT(io_uring_poll_arm,
__entry->events = events;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode),
__entry->mask, __entry->events)
);
@@ -470,8 +484,9 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask = mask;
),
- TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x",
- __entry->ctx, __entry->req, __entry->user_data, __entry->opcode,
+ TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
+ __entry->ctx, __entry->req, __entry->user_data,
+ io_uring_get_opcode(__entry->opcode),
__entry->mask)
);
@@ -506,7 +521,7 @@ TRACE_EVENT(io_uring_req_failed,
__field( u16, personality )
__field( u32, file_index )
__field( u64, pad1 )
- __field( u64, pad2 )
+ __field( u64, addr3 )
__field( int, error )
),
@@ -520,27 +535,69 @@ TRACE_EVENT(io_uring_req_failed,
__entry->off = sqe->off;
__entry->addr = sqe->addr;
__entry->len = sqe->len;
- __entry->op_flags = sqe->rw_flags;
+ __entry->op_flags = sqe->poll32_events;
__entry->buf_index = sqe->buf_index;
__entry->personality = sqe->personality;
__entry->file_index = sqe->file_index;
__entry->pad1 = sqe->__pad2[0];
- __entry->pad2 = sqe->__pad2[1];
+ __entry->addr3 = sqe->addr3;
__entry->error = error;
),
TP_printk("ring %p, req %p, user_data 0x%llx, "
- "op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
+ "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
"len=%u, rw_flags=0x%x, buf_index=%d, "
- "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+ "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
+ "error=%d",
__entry->ctx, __entry->req, __entry->user_data,
- __entry->opcode, __entry->flags, __entry->ioprio,
+ io_uring_get_opcode(__entry->opcode),
+ __entry->flags, __entry->ioprio,
(unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len,
__entry->op_flags,
__entry->buf_index, __entry->personality, __entry->file_index,
(unsigned long long) __entry->pad1,
- (unsigned long long) __entry->pad2, __entry->error)
+ (unsigned long long) __entry->addr3, __entry->error)
+);
+
+
+/*
+ * io_uring_cqe_overflow - a CQE overflowed
+ *
+ * @ctx: pointer to a ring context structure
+ * @user_data: user data associated with the request
+ * @res: CQE result
+ * @cflags: CQE flags
+ * @ocqe: pointer to the overflow cqe (if available)
+ *
+ */
+TRACE_EVENT(io_uring_cqe_overflow,
+
+ TP_PROTO(void *ctx, unsigned long long user_data, s32 res, u32 cflags,
+ void *ocqe),
+
+ TP_ARGS(ctx, user_data, res, cflags, ocqe),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( unsigned long long, user_data )
+ __field( s32, res )
+ __field( u32, cflags )
+ __field( void *, ocqe )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->user_data = user_data;
+ __entry->res = res;
+ __entry->cflags = cflags;
+ __entry->ocqe = ocqe;
+ ),
+
+ TP_printk("ring %p, user_data 0x%llx, res %d, flags %x, "
+ "overflow_cqe %p",
+ __entry->ctx, __entry->user_data, __entry->res,
+ __entry->cflags, __entry->ocqe)
);
#endif /* _TRACE_IO_URING_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 65e786756321..fbb99a61f714 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt,
TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next),
+ struct task_struct *next,
+ unsigned int prev_state),
- TP_ARGS(preempt, prev_state, prev, next),
+ TP_ARGS(preempt, prev, next, prev_state),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 804ff8d98f71..011e594e4a0d 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -103,7 +103,7 @@
#define CDROMREADALL 0x5318 /* read all 2646 bytes */
/*
- * These ioctls are (now) only in ide-cd.c for controlling
+ * These ioctls were only in (now removed) ide-cd.c for controlling
* drive spindown time. They should be implemented in the
* Uniform driver, via generic packet commands, GPCMD_MODE_SELECT_10,
* GPCMD_MODE_SENSE_10 and the GPMODE_POWER_PAGE...
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index 8e4a2ca0bcbf..b1523cb8ab30 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -92,7 +92,7 @@ struct dma_buf_sync {
* between them in actual uapi, they're just different numbers.
*/
#define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *)
-#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32)
-#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64)
+#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32)
+#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
#endif
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 787c657bfae8..7ce993e6786c 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -42,7 +42,7 @@ typedef __s64 Elf64_Sxword;
/* ARM MTE memory tag segment type */
-#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1)
+#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2)
/*
* Extended Numbering
diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h
index 4c14e8be7267..3a49913d006c 100644
--- a/include/uapi/linux/fb.h
+++ b/include/uapi/linux/fb.h
@@ -182,7 +182,7 @@ struct fb_fix_screeninfo {
*
* For pseudocolor: offset and length should be the same for all color
* components. Offset specifies the position of the least significant bit
- * of the pallette index in a pixel value. Length indicates the number
+ * of the palette index in a pixel value. Length indicates the number
* of available palette entries (i.e. # of entries = 1 << length).
*/
struct fb_bitfield {
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 7989d9483ea7..dff8e7f17074 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -662,6 +662,27 @@
/* Select an area of screen to be copied */
#define KEY_SELECTIVE_SCREENSHOT 0x27a
+/* Move the focus to the next or previous user controllable element within a UI container */
+#define KEY_NEXT_ELEMENT 0x27b
+#define KEY_PREVIOUS_ELEMENT 0x27c
+
+/* Toggle Autopilot engagement */
+#define KEY_AUTOPILOT_ENGAGE_TOGGLE 0x27d
+
+/* Shortcut Keys */
+#define KEY_MARK_WAYPOINT 0x27e
+#define KEY_SOS 0x27f
+#define KEY_NAV_CHART 0x280
+#define KEY_FISHING_CHART 0x281
+#define KEY_SINGLE_RANGE_RADAR 0x282
+#define KEY_DUAL_RANGE_RADAR 0x283
+#define KEY_RADAR_OVERLAY 0x284
+#define KEY_TRADITIONAL_SONAR 0x285
+#define KEY_CLEARVU_SONAR 0x286
+#define KEY_SIDEVU_SONAR 0x287
+#define KEY_NAV_INFO 0x288
+#define KEY_BRIGHTNESS_MENU 0x289
+
/*
* Some keyboards have keys which do not have a defined meaning, these keys
* are intended to be programmed / bound to macros by the user. For most
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1845cf7c80ba..53e7dae92e42 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -22,6 +22,7 @@ struct io_uring_sqe {
union {
__u64 off; /* offset into file */
__u64 addr2;
+ __u32 cmd_op;
};
union {
__u64 addr; /* pointer to buffer or iovecs */
@@ -45,6 +46,7 @@ struct io_uring_sqe {
__u32 rename_flags;
__u32 unlink_flags;
__u32 hardlink_flags;
+ __u32 xattr_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -60,9 +62,28 @@ struct io_uring_sqe {
__s32 splice_fd_in;
__u32 file_index;
};
- __u64 __pad2[2];
+ union {
+ struct {
+ __u64 addr3;
+ __u64 __pad2[1];
+ };
+ /*
+ * If the ring is initialized with IORING_SETUP_SQE128, then
+ * this field is used for 80 bytes of arbitrary command data
+ */
+ __u8 cmd[0];
+ };
};
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC (~0U)
+
enum {
IOSQE_FIXED_FILE_BIT,
IOSQE_IO_DRAIN_BIT,
@@ -102,8 +123,25 @@ enum {
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN (1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
-enum {
+#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
+
+enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
IORING_OP_WRITEV,
@@ -145,6 +183,12 @@ enum {
IORING_OP_SYMLINKAT,
IORING_OP_LINKAT,
IORING_OP_MSG_RING,
+ IORING_OP_FSETXATTR,
+ IORING_OP_SETXATTR,
+ IORING_OP_FGETXATTR,
+ IORING_OP_GETXATTR,
+ IORING_OP_SOCKET,
+ IORING_OP_URING_CMD,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -188,12 +232,45 @@ enum {
#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
/*
+ * ASYNC_CANCEL flags.
+ *
+ * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key
+ * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the
+ * request 'user_data'
+ * IORING_ASYNC_CANCEL_ANY Match any request
+ */
+#define IORING_ASYNC_CANCEL_ALL (1U << 0)
+#define IORING_ASYNC_CANCEL_FD (1U << 1)
+#define IORING_ASYNC_CANCEL_ANY (1U << 2)
+
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ *
+ * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
+ * or receive and arm poll if that yields an
+ * -EAGAIN result, arm poll upfront and skip
+ * the initial transfer attempt.
+ */
+#define IORING_RECVSEND_POLL_FIRST (1U << 0)
+
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT (1U << 0)
+
+/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
__u64 user_data; /* sqe->data submission passed back */
__s32 res; /* result code for this event */
__u32 flags;
+
+ /*
+ * If the ring is initialized with IORING_SETUP_CQE32, then this field
+ * contains 16-bytes of padding, doubling the size of the CQE.
+ */
+ __u64 big_cqe[];
};
/*
@@ -201,9 +278,11 @@ struct io_uring_cqe {
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
@@ -236,6 +315,7 @@ struct io_sqring_offsets {
*/
#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */
struct io_cqring_offsets {
__u32 head;
@@ -333,6 +413,10 @@ enum {
IORING_REGISTER_RING_FDS = 20,
IORING_UNREGISTER_RING_FDS = 21,
+ /* register ring based provide buffer group */
+ IORING_REGISTER_PBUF_RING = 22,
+ IORING_UNREGISTER_PBUF_RING = 23,
+
/* this goes last */
IORING_REGISTER_LAST
};
@@ -350,9 +434,15 @@ struct io_uring_files_update {
__aligned_u64 /* __s32 * */ fds;
};
+/*
+ * Register a fully sparse file space, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
+
struct io_uring_rsrc_register {
__u32 nr;
- __u32 resv;
+ __u32 flags;
__u64 resv2;
__aligned_u64 data;
__aligned_u64 tags;
@@ -404,6 +494,38 @@ struct io_uring_restriction {
__u32 resv2[3];
};
+struct io_uring_buf {
+ __u64 addr;
+ __u32 len;
+ __u16 bid;
+ __u16 resv;
+};
+
+struct io_uring_buf_ring {
+ union {
+ /*
+ * To avoid spilling into more pages than we need to, the
+ * ring tail is overlaid with the io_uring_buf->resv field.
+ */
+ struct {
+ __u64 resv1;
+ __u32 resv2;
+ __u16 resv3;
+ __u16 tail;
+ };
+ struct io_uring_buf bufs[0];
+ };
+};
+
+/* argument for IORING_(UN)REGISTER_PBUF_RING */
+struct io_uring_buf_reg {
+ __u64 ring_addr;
+ __u32 ring_entries;
+ __u16 bgid;
+ __u16 pad;
+ __u64 resv[3];
+};
+
/*
* io_uring_restriction->opcode values
*/
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 91a6fe4e02c0..6a184d260c7f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -445,7 +445,13 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
__u32 type;
- __u64 flags;
+ __u32 ndata;
+ union {
+#ifndef __KERNEL__
+ __u64 flags;
+#endif
+ __u64 data[16];
+ };
} system_event;
/* KVM_EXIT_S390_STSI */
struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
#define KVM_CAP_PMU_CAPABILITY 212
#define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 98e60801195e..6f63527dd2ed 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -1,11 +1,6 @@
/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
/*
- * include/linux/loop.h
- *
- * Written by Theodore Ts'o, 3/29/93.
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
+ * Copyright 1993 by Theodore Ts'o.
*/
#ifndef _UAPI_LINUX_LOOP_H
#define _UAPI_LINUX_LOOP_H
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index b2e43185e3b5..2f76cba67166 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 {
__u64 result;
};
+/* same as struct nvme_passthru_cmd64, minus the 8b result field */
+struct nvme_uring_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 rsvd2;
+};
+
#define nvme_admin_cmd nvme_passthru_cmd
#define NVME_IOCTL_ID _IO('N', 0x40)
@@ -83,4 +105,10 @@ struct nvme_passthru_cmd64 {
#define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64)
#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64)
+/* io_uring async commands: */
+#define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd)
+#define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd)
+#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd)
+
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h
index 283c5a7b3f2c..db6c8588c1d0 100644
--- a/include/uapi/linux/rfkill.h
+++ b/include/uapi/linux/rfkill.h
@@ -184,7 +184,7 @@ struct rfkill_event_ext {
#define RFKILL_IOC_NOINPUT 1
#define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
#define RFKILL_IOC_MAX_SIZE 2
-#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32)
+#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32)
/* and that's all userspace gets */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 80d76b75bccd..7aa2eb766205 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -73,12 +73,12 @@
* Virtio Transitional IDs
*/
-#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */
-#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */
-#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */
-#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */
-#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */
-#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */
-#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */
+#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */
+#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */
+#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */
+#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */
+#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */
+#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */
+#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ea2ee1181921..f3a2abd6d1a1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1959,6 +1959,12 @@ void __audit_uring_exit(int success, long code)
{
struct audit_context *ctx = audit_context();
+ if (ctx->dummy) {
+ if (ctx->context != AUDIT_CTX_URING)
+ return;
+ goto out;
+ }
+
if (ctx->context == AUDIT_CTX_SYSCALL) {
/*
* NOTE: See the note in __audit_uring_entry() about the case
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index d56ee177d5f8..2dfe1079f772 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -27,6 +27,7 @@ config BPF_SYSCALL
bool "Enable bpf() system call"
select BPF
select IRQ_WORK
+ select TASKS_RCU if PREEMPTION
select TASKS_TRACE_RCU
select BINARY_PRINTF
select NET_SOCK_MSG if NET
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 9390bfd9f1cd..71a418858a5e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3390,8 +3390,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = {
*/
void __init cpuset_init_smp(void)
{
- cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
- top_cpuset.mems_allowed = node_states[N_MEMORY];
+ /*
+ * cpus_allowd/mems_allowed set to v2 values in the initial
+ * cpuset_bind() call will be reset to v1 values in another
+ * cpuset_bind() call when v1 cpuset is mounted.
+ */
top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d0a9aa0b42e8..02a77ac08399 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -716,14 +716,6 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
/*
* The cpu hotplug threads manage the bringup and teardown of the cpus
*/
-static void cpuhp_create(unsigned int cpu)
-{
- struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-
- init_completion(&st->done_up);
- init_completion(&st->done_down);
-}
-
static int cpuhp_should_run(unsigned int cpu)
{
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
@@ -883,15 +875,27 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
static struct smp_hotplug_thread cpuhp_threads = {
.store = &cpuhp_state.thread,
- .create = &cpuhp_create,
.thread_should_run = cpuhp_should_run,
.thread_fn = cpuhp_thread_fun,
.thread_comm = "cpuhp/%u",
.selfparking = true,
};
+static __init void cpuhp_init_state(void)
+{
+ struct cpuhp_cpu_state *st;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ st = per_cpu_ptr(&cpuhp_state, cpu);
+ init_completion(&st->done_up);
+ init_completion(&st->done_down);
+ }
+}
+
void __init cpuhp_threads_init(void)
{
+ cpuhp_init_state();
BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
kthread_unpark(this_cpu_read(cpuhp_state.thread));
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 23bb19716ad3..7f1e4c5897e7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6247,7 +6247,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
again:
mutex_lock(&event->mmap_mutex);
if (event->rb) {
- if (event->rb->nr_pages != nr_pages) {
+ if (data_page_nr(event->rb) != nr_pages) {
ret = -EINVAL;
goto unlock;
}
@@ -12217,6 +12217,9 @@ SYSCALL_DEFINE5(perf_event_open,
* Do not allow to attach to a group in a different task
* or CPU context. If we're moving SW events, we'll fix
* this up later, so allow that.
+ *
+ * Racy, not holding group_leader->ctx->mutex, see comment with
+ * perf_event_ctx_lock().
*/
if (!move_group && group_leader->ctx != ctx)
goto err_context;
@@ -12282,6 +12285,7 @@ SYSCALL_DEFINE5(perf_event_open,
} else {
perf_event_ctx_unlock(group_leader, gctx);
move_group = 0;
+ goto not_move_group;
}
}
@@ -12298,7 +12302,17 @@ SYSCALL_DEFINE5(perf_event_open,
}
} else {
mutex_lock(&ctx->mutex);
+
+ /*
+ * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
+ * see the group_leader && !move_group test earlier.
+ */
+ if (group_leader && group_leader->ctx != ctx) {
+ err = -EINVAL;
+ goto err_locked;
+ }
}
+not_move_group:
if (ctx->task == TASK_TOMBSTONE) {
err = -ESRCH;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 082832738c8f..5150d5f84c03 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb)
}
#endif
+static inline int data_page_nr(struct perf_buffer *rb)
+{
+ return rb->nr_pages << page_order(rb);
+}
+
static inline unsigned long perf_data_size(struct perf_buffer *rb)
{
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 52868716ec35..fb35b926024c 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -859,11 +859,6 @@ void rb_free(struct perf_buffer *rb)
}
#else
-static int data_page_nr(struct perf_buffer *rb)
-{
- return rb->nr_pages << page_order(rb);
-}
-
static struct page *
__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
{
diff --git a/kernel/fork.c b/kernel/fork.c
index 9796897560ab..35a3beff140b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -792,6 +792,7 @@ void __mmdrop(struct mm_struct *mm)
mmu_notifier_subscriptions_destroy(mm);
check_mm(mm);
put_user_ns(mm->user_ns);
+ mm_pasid_drop(mm);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1190,7 +1191,6 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
- mm_pasid_drop(mm);
mmdrop(mm);
}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 99cbdf55a8bd..f09c60393e55 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -29,12 +29,14 @@ extern struct irqaction chained_action;
* IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
* IRQTF_AFFINITY - irq thread is requested to adjust affinity
* IRQTF_FORCED_THREAD - irq action is force threaded
+ * IRQTF_READY - signals that irq thread is ready
*/
enum {
IRQTF_RUNTHREAD,
IRQTF_WARNED,
IRQTF_AFFINITY,
IRQTF_FORCED_THREAD,
+ IRQTF_READY,
};
/*
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 939d21cd55c3..d323b180b0f3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -407,6 +407,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
mutex_init(&desc->request_mutex);
init_rcu_head(&desc->rcu);
+ init_waitqueue_head(&desc->wait_for_threads);
desc_set_defaults(irq, desc, node, affinity, owner);
irqd_set(&desc->irq_data, flags);
@@ -575,6 +576,7 @@ int __init early_irq_init(void)
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
mutex_init(&desc[i].request_mutex);
+ init_waitqueue_head(&desc[i].wait_for_threads);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
}
return arch_early_irq_init();
@@ -699,7 +701,6 @@ EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
*/
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
{
- WARN_ON_ONCE(!in_hardirq());
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f1d5a94c6c9f..8c396319d5ac 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1264,6 +1264,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
}
/*
+ * Internal function to notify that a interrupt thread is ready.
+ */
+static void irq_thread_set_ready(struct irq_desc *desc,
+ struct irqaction *action)
+{
+ set_bit(IRQTF_READY, &action->thread_flags);
+ wake_up(&desc->wait_for_threads);
+}
+
+/*
+ * Internal function to wake up a interrupt thread and wait until it is
+ * ready.
+ */
+static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc,
+ struct irqaction *action)
+{
+ if (!action || !action->thread)
+ return;
+
+ wake_up_process(action->thread);
+ wait_event(desc->wait_for_threads,
+ test_bit(IRQTF_READY, &action->thread_flags));
+}
+
+/*
* Interrupt handler thread
*/
static int irq_thread(void *data)
@@ -1274,6 +1299,8 @@ static int irq_thread(void *data)
irqreturn_t (*handler_fn)(struct irq_desc *desc,
struct irqaction *action);
+ irq_thread_set_ready(desc, action);
+
sched_set_fifo(current);
if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
@@ -1698,8 +1725,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!shared) {
- init_waitqueue_head(&desc->wait_for_threads);
-
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc,
@@ -1795,14 +1820,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
irq_setup_timings(desc, new);
- /*
- * Strictly no need to wake it up, but hung_task complains
- * when no hard interrupt wakes the thread up.
- */
- if (new->thread)
- wake_up_process(new->thread);
- if (new->secondary)
- wake_up_process(new->secondary->thread);
+ wake_up_and_wait_for_irq_thread_ready(desc, new);
+ wake_up_and_wait_for_irq_thread_ready(desc, new->secondary);
register_irq_proc(irq, desc);
new->dir = NULL;
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 475524bd900a..b3732b210593 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -475,8 +475,11 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma)
vma->vm_flags |= VM_DONTEXPAND;
for (off = 0; off < size; off += PAGE_SIZE) {
page = vmalloc_to_page(kcov->area + off);
- if (vm_insert_page(vma, vma->vm_start + off, page))
- WARN_ONCE(1, "vm_insert_page() failed");
+ res = vm_insert_page(vma, vma->vm_start + off, page);
+ if (res) {
+ pr_warn_once("kcov: vm_insert_page() failed\n");
+ return res;
+ }
}
return 0;
exit:
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index dbe57df2e199..dd58c0be9ce2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2126,7 +2126,7 @@ static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
struct kprobe_ctlblk *kcb;
/* The data must NOT be null. This means rethook data structure is broken. */
- if (WARN_ON_ONCE(!data))
+ if (WARN_ON_ONCE(!data) || !rp->handler)
return;
__this_cpu_write(current_kprobe, &rp->kp);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 50265f69a135..544fd4097406 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1522,5 +1522,4 @@ struct cgroup_subsys_state *kthread_blkcg(void)
}
return NULL;
}
-EXPORT_SYMBOL(kthread_blkcg);
#endif
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index bf8e341e75b4..1c630e573548 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -77,31 +77,56 @@ config TASKS_RCU_GENERIC
This option enables generic infrastructure code supporting
task-based RCU implementations. Not for manual selection.
+config FORCE_TASKS_RCU
+ bool "Force selection of TASKS_RCU"
+ depends on RCU_EXPERT
+ select TASKS_RCU
+ default n
+ help
+ This option force-enables a task-based RCU implementation
+ that uses only voluntary context switch (not preemption!),
+ idle, and user-mode execution as quiescent states. Not for
+ manual selection in most cases.
+
config TASKS_RCU
- def_bool PREEMPTION
+ bool
+ default n
+ select IRQ_WORK
+
+config FORCE_TASKS_RUDE_RCU
+ bool "Force selection of Tasks Rude RCU"
+ depends on RCU_EXPERT
+ select TASKS_RUDE_RCU
+ default n
help
- This option enables a task-based RCU implementation that uses
- only voluntary context switch (not preemption!), idle, and
- user-mode execution as quiescent states. Not for manual selection.
+ This option force-enables a task-based RCU implementation
+ that uses only context switch (including preemption) and
+ user-mode execution as quiescent states. It forces IPIs and
+ context switches on all online CPUs, including idle ones,
+ so use with caution. Not for manual selection in most cases.
config TASKS_RUDE_RCU
- def_bool 0
+ bool
+ default n
+ select IRQ_WORK
+
+config FORCE_TASKS_TRACE_RCU
+ bool "Force selection of Tasks Trace RCU"
+ depends on RCU_EXPERT
+ select TASKS_TRACE_RCU
+ default n
help
This option enables a task-based RCU implementation that uses
- only context switch (including preemption) and user-mode
- execution as quiescent states. It forces IPIs and context
- switches on all online CPUs, including idle ones, so use
- with caution.
+ explicit rcu_read_lock_trace() read-side markers, and allows
+ these readers to appear in the idle loop as well as on the
+ CPU hotplug code paths. It can force IPIs on online CPUs,
+ including idle ones, so use with caution. Not for manual
+ selection in most cases.
config TASKS_TRACE_RCU
- def_bool 0
+ bool
+ default n
select IRQ_WORK
- help
- This option enables a task-based RCU implementation that uses
- explicit rcu_read_lock_trace() read-side markers, and allows
- these readers to appear in the idle loop as well as on the CPU
- hotplug code paths. It can force IPIs on online CPUs, including
- idle ones, so use with caution.
config RCU_STALL_COMMON
def_bool TREE_RCU
@@ -195,6 +220,20 @@ config RCU_BOOST_DELAY
Accept the default if unsure.
+config RCU_EXP_KTHREAD
+ bool "Perform RCU expedited work in a real-time kthread"
+ depends on RCU_BOOST && RCU_EXPERT
+ default !PREEMPT_RT && NR_CPUS <= 32
+ help
+ Use this option to further reduce the latencies of expedited
+ grace periods at the expense of being more disruptive.
+
+ This option is disabled by default on PREEMPT_RT=y kernels which
+ disable expedited grace periods after boot by unconditionally
+ setting rcupdate.rcu_normal_after_boot=1.
+
+ Accept the default if unsure.
+
config RCU_NOCB_CPU
bool "Offload RCU callback processing from boot-selected CPUs"
depends on TREE_RCU
@@ -225,7 +264,7 @@ config RCU_NOCB_CPU
config TASKS_TRACE_RCU_READ_MB
bool "Tasks Trace RCU readers use memory barriers in user and idle"
- depends on RCU_EXPERT
+ depends on RCU_EXPERT && TASKS_TRACE_RCU
default PREEMPT_RT || NR_CPUS < 8
help
Use this option to further reduce the number of IPIs sent
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 4fd64999300f..9b64e55d4f61 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -28,9 +28,6 @@ config RCU_SCALE_TEST
depends on DEBUG_KERNEL
select TORTURE_TEST
select SRCU
- select TASKS_RCU
- select TASKS_RUDE_RCU
- select TASKS_TRACE_RCU
default n
help
This option provides a kernel module that runs performance
@@ -47,9 +44,6 @@ config RCU_TORTURE_TEST
depends on DEBUG_KERNEL
select TORTURE_TEST
select SRCU
- select TASKS_RCU
- select TASKS_RUDE_RCU
- select TASKS_TRACE_RCU
default n
help
This option provides a kernel module that runs torture tests
@@ -66,9 +60,6 @@ config RCU_REF_SCALE_TEST
depends on DEBUG_KERNEL
select TORTURE_TEST
select SRCU
- select TASKS_RCU
- select TASKS_RUDE_RCU
- select TASKS_TRACE_RCU
default n
help
This option provides a kernel module that runs performance tests
@@ -91,6 +82,20 @@ config RCU_CPU_STALL_TIMEOUT
RCU grace period persists, additional CPU stall warnings are
printed at more widely spaced intervals.
+config RCU_EXP_CPU_STALL_TIMEOUT
+ int "Expedited RCU CPU stall timeout in milliseconds"
+ depends on RCU_STALL_COMMON
+ range 0 21000
+ default 20 if ANDROID
+ default 0 if !ANDROID
+ help
+ If a given expedited RCU grace period extends more than the
+ specified number of milliseconds, a CPU stall warning is printed.
+ If the RCU grace period persists, additional CPU stall warnings
+ are printed at more widely spaced intervals. A value of zero
+ says to use the RCU_CPU_STALL_TIMEOUT value converted from
+ seconds to milliseconds.
+
config RCU_TRACE
bool "Enable tracing for RCU"
depends on DEBUG_KERNEL
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 24b5f2c2de87..152492d52715 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
+extern int rcu_exp_cpu_stall_timeout;
int rcu_jiffies_till_stall_check(void);
+int rcu_exp_jiffies_till_stall_check(void);
static inline bool rcu_stall_is_suppressed(void)
{
@@ -523,6 +525,8 @@ static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { ret
static inline void show_rcu_gp_kthreads(void) { }
static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
static inline void rcu_fwd_progress_check(unsigned long j) { }
+static inline void rcu_gp_slow_register(atomic_t *rgssp) { }
+static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
unsigned long rcu_get_gp_seq(void);
@@ -534,14 +538,19 @@ int rcu_get_gp_kthreads_prio(void);
void rcu_fwd_progress_check(unsigned long j);
void rcu_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
+#ifdef CONFIG_RCU_EXP_KTHREAD
+extern struct kthread_worker *rcu_exp_gp_kworker;
+extern struct kthread_worker *rcu_exp_par_gp_kworker;
+#else /* !CONFIG_RCU_EXP_KTHREAD */
extern struct workqueue_struct *rcu_par_gp_wq;
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+void rcu_gp_slow_register(atomic_t *rgssp);
+void rcu_gp_slow_unregister(atomic_t *rgssp);
#endif /* #else #ifdef CONFIG_TINY_RCU */
#ifdef CONFIG_RCU_NOCB_CPU
-bool rcu_is_nocb_cpu(int cpu);
void rcu_bind_current_to_nocb(void);
#else
-static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
static inline void rcu_bind_current_to_nocb(void) { }
#endif
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 81145c3ece25..c54ea2b6a36b 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -505,10 +505,10 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]);
/*
- * Callbacks moved, so clean up the misordered ->tails[] pointers
- * that now point into the middle of the list of ready-to-invoke
- * callbacks. The overall effect is to copy down the later pointers
- * into the gap that was created by the now-ready segments.
+ * Callbacks moved, so there might be an empty RCU_WAIT_TAIL
+ * and a non-empty RCU_NEXT_READY_TAIL. If so, copy the
+ * RCU_NEXT_READY_TAIL segment to fill the RCU_WAIT_TAIL gap
+ * created by the now-ready-to-invoke segments.
*/
for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 5e4f1f83d38e..277a5bfb37d4 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -268,6 +268,8 @@ static struct rcu_scale_ops srcud_ops = {
.name = "srcud"
};
+#ifdef CONFIG_TASKS_RCU
+
/*
* Definitions for RCU-tasks scalability testing.
*/
@@ -295,6 +297,16 @@ static struct rcu_scale_ops tasks_ops = {
.name = "tasks"
};
+#define TASKS_OPS &tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
/*
* Definitions for RCU-tasks-trace scalability testing.
*/
@@ -324,6 +336,14 @@ static struct rcu_scale_ops tasks_tracing_ops = {
.name = "tasks-tracing"
};
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
@@ -797,7 +817,7 @@ rcu_scale_init(void)
long i;
int firsterr = 0;
static struct rcu_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops
+ &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
};
if (!torture_init_begin(scale_type, verbose))
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 55d049c39608..7120165a9342 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -738,6 +738,50 @@ static struct rcu_torture_ops busted_srcud_ops = {
};
/*
+ * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
+ * This implementation does not necessarily work well with CPU hotplug.
+ */
+
+static void synchronize_rcu_trivial(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
+ WARN_ON_ONCE(raw_smp_processor_id() != cpu);
+ }
+}
+
+static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
+{
+ preempt_disable();
+ return 0;
+}
+
+static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
+{
+ preempt_enable();
+}
+
+static struct rcu_torture_ops trivial_ops = {
+ .ttype = RCU_TRIVIAL_FLAVOR,
+ .init = rcu_sync_torture_init,
+ .readlock = rcu_torture_read_lock_trivial,
+ .read_delay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = rcu_torture_read_unlock_trivial,
+ .readlock_held = torture_readlock_not_held,
+ .get_gp_seq = rcu_no_completed,
+ .sync = synchronize_rcu_trivial,
+ .exp_sync = synchronize_rcu_trivial,
+ .fqs = NULL,
+ .stats = NULL,
+ .irq_capable = 1,
+ .name = "trivial"
+};
+
+#ifdef CONFIG_TASKS_RCU
+
+/*
* Definitions for RCU-tasks torture testing.
*/
@@ -780,47 +824,16 @@ static struct rcu_torture_ops tasks_ops = {
.name = "tasks"
};
-/*
- * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
- * This implementation does not necessarily work well with CPU hotplug.
- */
+#define TASKS_OPS &tasks_ops,
-static void synchronize_rcu_trivial(void)
-{
- int cpu;
+#else // #ifdef CONFIG_TASKS_RCU
- for_each_online_cpu(cpu) {
- rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
- WARN_ON_ONCE(raw_smp_processor_id() != cpu);
- }
-}
+#define TASKS_OPS
-static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
-{
- preempt_disable();
- return 0;
-}
+#endif // #else #ifdef CONFIG_TASKS_RCU
-static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
-{
- preempt_enable();
-}
-static struct rcu_torture_ops trivial_ops = {
- .ttype = RCU_TRIVIAL_FLAVOR,
- .init = rcu_sync_torture_init,
- .readlock = rcu_torture_read_lock_trivial,
- .read_delay = rcu_read_delay, /* just reuse rcu's version. */
- .readunlock = rcu_torture_read_unlock_trivial,
- .readlock_held = torture_readlock_not_held,
- .get_gp_seq = rcu_no_completed,
- .sync = synchronize_rcu_trivial,
- .exp_sync = synchronize_rcu_trivial,
- .fqs = NULL,
- .stats = NULL,
- .irq_capable = 1,
- .name = "trivial"
-};
+#ifdef CONFIG_TASKS_RUDE_RCU
/*
* Definitions for rude RCU-tasks torture testing.
@@ -851,6 +864,17 @@ static struct rcu_torture_ops tasks_rude_ops = {
.name = "tasks-rude"
};
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_RUDE_RCU
+
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
/*
* Definitions for tracing RCU-tasks torture testing.
*/
@@ -893,6 +917,15 @@ static struct rcu_torture_ops tasks_tracing_ops = {
.name = "tasks-tracing"
};
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_TRACE_RCU
+
+
static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
@@ -1178,7 +1211,7 @@ rcu_torture_writer(void *arg)
" GP expediting controlled from boot/sysfs for %s.\n",
torture_type, cur_ops->name);
if (WARN_ONCE(nsynctypes == 0,
- "rcu_torture_writer: No update-side primitives.\n")) {
+ "%s: No update-side primitives.\n", __func__)) {
/*
* No updates primitives, so don't try updating.
* The resulting test won't be testing much, hence the
@@ -1186,6 +1219,7 @@ rcu_torture_writer(void *arg)
*/
rcu_torture_writer_state = RTWS_STOPPING;
torture_kthread_stopping("rcu_torture_writer");
+ return 0;
}
do {
@@ -1322,6 +1356,17 @@ rcu_torture_fakewriter(void *arg)
VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
set_user_nice(current, MAX_NICE);
+ if (WARN_ONCE(nsynctypes == 0,
+ "%s: No update-side primitives.\n", __func__)) {
+ /*
+ * No updates primitives, so don't try updating.
+ * The resulting test won't be testing much, hence the
+ * above WARN_ONCE().
+ */
+ torture_kthread_stopping("rcu_torture_fakewriter");
+ return 0;
+ }
+
do {
torture_hrtimeout_jiffies(torture_random(&rand) % 10, &rand);
if (cur_ops->cb_barrier != NULL &&
@@ -2916,10 +2961,12 @@ rcu_torture_cleanup(void)
pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier);
cur_ops->cb_barrier();
}
+ rcu_gp_slow_unregister(NULL);
return;
}
if (!cur_ops) {
torture_cleanup_end();
+ rcu_gp_slow_unregister(NULL);
return;
}
@@ -3016,6 +3063,7 @@ rcu_torture_cleanup(void)
else
rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
torture_cleanup_end();
+ rcu_gp_slow_unregister(&rcu_fwd_cb_nodelay);
}
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -3096,9 +3144,9 @@ rcu_torture_init(void)
int flags = 0;
unsigned long gp_seq = 0;
static struct rcu_torture_ops *torture_ops[] = {
- &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
- &busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
- &tasks_tracing_ops, &trivial_ops,
+ &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
+ TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
+ &trivial_ops,
};
if (!torture_init_begin(torture_type, verbose))
@@ -3320,6 +3368,7 @@ rcu_torture_init(void)
if (object_debug)
rcu_test_debug_objects();
torture_init_end();
+ rcu_gp_slow_register(&rcu_fwd_cb_nodelay);
return 0;
unwind:
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 5489ff7f478e..909644abee67 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -207,6 +207,8 @@ static struct ref_scale_ops srcu_ops = {
.name = "srcu"
};
+#ifdef CONFIG_TASKS_RCU
+
// Definitions for RCU Tasks ref scale testing: Empty read markers.
// These definitions also work for RCU Rude readers.
static void rcu_tasks_ref_scale_read_section(const int nloops)
@@ -232,6 +234,16 @@ static struct ref_scale_ops rcu_tasks_ops = {
.name = "rcu-tasks"
};
+#define RCU_TASKS_OPS &rcu_tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define RCU_TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
// Definitions for RCU Tasks Trace ref scale testing.
static void rcu_trace_ref_scale_read_section(const int nloops)
{
@@ -261,6 +273,14 @@ static struct ref_scale_ops rcu_trace_ops = {
.name = "rcu-trace"
};
+#define RCU_TRACE_OPS &rcu_trace_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define RCU_TRACE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
// Definitions for reference count
static atomic_t refcnt;
@@ -790,7 +810,7 @@ ref_scale_init(void)
long i;
int firsterr = 0;
static struct ref_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
+ &rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
};
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 6833d8887181..50ba70f019de 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -24,6 +24,7 @@
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/srcu.h>
#include "rcu.h"
@@ -38,6 +39,35 @@ module_param(exp_holdoff, ulong, 0444);
static ulong counter_wrap_check = (ULONG_MAX >> 2);
module_param(counter_wrap_check, ulong, 0444);
+/*
+ * Control conversion to SRCU_SIZE_BIG:
+ * 0: Don't convert at all.
+ * 1: Convert at init_srcu_struct() time.
+ * 2: Convert when rcutorture invokes srcu_torture_stats_print().
+ * 3: Decide at boot time based on system shape (default).
+ * 0x1x: Convert when excessive contention encountered.
+ */
+#define SRCU_SIZING_NONE 0
+#define SRCU_SIZING_INIT 1
+#define SRCU_SIZING_TORTURE 2
+#define SRCU_SIZING_AUTO 3
+#define SRCU_SIZING_CONTEND 0x10
+#define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x)
+#define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE))
+#define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT))
+#define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE))
+#define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND)
+static int convert_to_big = SRCU_SIZING_AUTO;
+module_param(convert_to_big, int, 0444);
+
+/* Number of CPUs to trigger init_srcu_struct()-time transition to big. */
+static int big_cpu_lim __read_mostly = 128;
+module_param(big_cpu_lim, int, 0444);
+
+/* Contention events per jiffy to initiate transition to big. */
+static int small_contention_lim __read_mostly = 100;
+module_param(small_contention_lim, int, 0444);
+
/* Early-boot callback-management, so early that no lock is required! */
static LIST_HEAD(srcu_boot_list);
static bool __read_mostly srcu_init_done;
@@ -48,39 +78,90 @@ static void process_srcu(struct work_struct *work);
static void srcu_delay_timer(struct timer_list *t);
/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
-#define spin_lock_rcu_node(p) \
-do { \
- spin_lock(&ACCESS_PRIVATE(p, lock)); \
- smp_mb__after_unlock_lock(); \
+#define spin_lock_rcu_node(p) \
+do { \
+ spin_lock(&ACCESS_PRIVATE(p, lock)); \
+ smp_mb__after_unlock_lock(); \
} while (0)
#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
-#define spin_lock_irq_rcu_node(p) \
-do { \
- spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
- smp_mb__after_unlock_lock(); \
+#define spin_lock_irq_rcu_node(p) \
+do { \
+ spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
+ smp_mb__after_unlock_lock(); \
} while (0)
-#define spin_unlock_irq_rcu_node(p) \
+#define spin_unlock_irq_rcu_node(p) \
spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
-#define spin_lock_irqsave_rcu_node(p, flags) \
-do { \
- spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
- smp_mb__after_unlock_lock(); \
+#define spin_lock_irqsave_rcu_node(p, flags) \
+do { \
+ spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+ smp_mb__after_unlock_lock(); \
} while (0)
-#define spin_unlock_irqrestore_rcu_node(p, flags) \
- spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
+#define spin_trylock_irqsave_rcu_node(p, flags) \
+({ \
+ bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
+ \
+ if (___locked) \
+ smp_mb__after_unlock_lock(); \
+ ___locked; \
+})
+
+#define spin_unlock_irqrestore_rcu_node(p, flags) \
+ spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
/*
- * Initialize SRCU combining tree. Note that statically allocated
+ * Initialize SRCU per-CPU data. Note that statically allocated
* srcu_struct structures might already have srcu_read_lock() and
* srcu_read_unlock() running against them. So if the is_static parameter
* is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
*/
-static void init_srcu_struct_nodes(struct srcu_struct *ssp)
+static void init_srcu_struct_data(struct srcu_struct *ssp)
+{
+ int cpu;
+ struct srcu_data *sdp;
+
+ /*
+ * Initialize the per-CPU srcu_data array, which feeds into the
+ * leaves of the srcu_node tree.
+ */
+ WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
+ ARRAY_SIZE(sdp->srcu_unlock_count));
+ for_each_possible_cpu(cpu) {
+ sdp = per_cpu_ptr(ssp->sda, cpu);
+ spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
+ rcu_segcblist_init(&sdp->srcu_cblist);
+ sdp->srcu_cblist_invoking = false;
+ sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
+ sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
+ sdp->mynode = NULL;
+ sdp->cpu = cpu;
+ INIT_WORK(&sdp->work, srcu_invoke_callbacks);
+ timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
+ sdp->ssp = ssp;
+ }
+}
+
+/* Invalid seq state, used during snp node initialization */
+#define SRCU_SNP_INIT_SEQ 0x2
+
+/*
+ * Check whether sequence number corresponding to snp node,
+ * is invalid.
+ */
+static inline bool srcu_invl_snp_seq(unsigned long s)
+{
+ return rcu_seq_state(s) == SRCU_SNP_INIT_SEQ;
+}
+
+/*
+ * Allocated and initialize SRCU combining tree. Returns @true if
+ * allocation succeeded and @false otherwise.
+ */
+static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
{
int cpu;
int i;
@@ -92,6 +173,9 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
/* Initialize geometry if it has not already been initialized. */
rcu_init_geometry();
+ ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags);
+ if (!ssp->node)
+ return false;
/* Work out the overall tree geometry. */
ssp->level[0] = &ssp->node[0];
@@ -105,10 +189,10 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
ARRAY_SIZE(snp->srcu_data_have_cbs));
for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
- snp->srcu_have_cbs[i] = 0;
+ snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ;
snp->srcu_data_have_cbs[i] = 0;
}
- snp->srcu_gp_seq_needed_exp = 0;
+ snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
snp->grplo = -1;
snp->grphi = -1;
if (snp == &ssp->node[0]) {
@@ -129,39 +213,31 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp)
* Initialize the per-CPU srcu_data array, which feeds into the
* leaves of the srcu_node tree.
*/
- WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
- ARRAY_SIZE(sdp->srcu_unlock_count));
level = rcu_num_lvls - 1;
snp_first = ssp->level[level];
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
- rcu_segcblist_init(&sdp->srcu_cblist);
- sdp->srcu_cblist_invoking = false;
- sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
- sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
sdp->mynode = &snp_first[cpu / levelspread[level]];
for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
if (snp->grplo < 0)
snp->grplo = cpu;
snp->grphi = cpu;
}
- sdp->cpu = cpu;
- INIT_WORK(&sdp->work, srcu_invoke_callbacks);
- timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
- sdp->ssp = ssp;
sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
}
+ smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
+ return true;
}
/*
* Initialize non-compile-time initialized fields, including the
- * associated srcu_node and srcu_data structures. The is_static
- * parameter is passed through to init_srcu_struct_nodes(), and
- * also tells us that ->sda has already been wired up to srcu_data.
+ * associated srcu_node and srcu_data structures. The is_static parameter
+ * tells us that ->sda has already been wired up to srcu_data.
*/
static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
{
+ ssp->srcu_size_state = SRCU_SIZE_SMALL;
+ ssp->node = NULL;
mutex_init(&ssp->srcu_cb_mutex);
mutex_init(&ssp->srcu_gp_mutex);
ssp->srcu_idx = 0;
@@ -170,13 +246,25 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
mutex_init(&ssp->srcu_barrier_mutex);
atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
INIT_DELAYED_WORK(&ssp->work, process_srcu);
+ ssp->sda_is_static = is_static;
if (!is_static)
ssp->sda = alloc_percpu(struct srcu_data);
if (!ssp->sda)
return -ENOMEM;
- init_srcu_struct_nodes(ssp);
+ init_srcu_struct_data(ssp);
ssp->srcu_gp_seq_needed_exp = 0;
ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+ if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
+ if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) {
+ if (!ssp->sda_is_static) {
+ free_percpu(ssp->sda);
+ ssp->sda = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG);
+ }
+ }
smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
return 0;
}
@@ -214,6 +302,86 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/*
+ * Initiate a transition to SRCU_SIZE_BIG with lock held.
+ */
+static void __srcu_transition_to_big(struct srcu_struct *ssp)
+{
+ lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
+ smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC);
+}
+
+/*
+ * Initiate an idempotent transition to SRCU_SIZE_BIG.
+ */
+static void srcu_transition_to_big(struct srcu_struct *ssp)
+{
+ unsigned long flags;
+
+ /* Double-checked locking on ->srcu_size-state. */
+ if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL)
+ return;
+ spin_lock_irqsave_rcu_node(ssp, flags);
+ if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) {
+ spin_unlock_irqrestore_rcu_node(ssp, flags);
+ return;
+ }
+ __srcu_transition_to_big(ssp);
+ spin_unlock_irqrestore_rcu_node(ssp, flags);
+}
+
+/*
+ * Check to see if the just-encountered contention event justifies
+ * a transition to SRCU_SIZE_BIG.
+ */
+static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
+{
+ unsigned long j;
+
+ if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state)
+ return;
+ j = jiffies;
+ if (ssp->srcu_size_jiffies != j) {
+ ssp->srcu_size_jiffies = j;
+ ssp->srcu_n_lock_retries = 0;
+ }
+ if (++ssp->srcu_n_lock_retries <= small_contention_lim)
+ return;
+ __srcu_transition_to_big(ssp);
+}
+
+/*
+ * Acquire the specified srcu_data structure's ->lock, but check for
+ * excessive contention, which results in initiation of a transition
+ * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
+ * parameter permits this.
+ */
+static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags)
+{
+ struct srcu_struct *ssp = sdp->ssp;
+
+ if (spin_trylock_irqsave_rcu_node(sdp, *flags))
+ return;
+ spin_lock_irqsave_rcu_node(ssp, *flags);
+ spin_lock_irqsave_check_contention(ssp);
+ spin_unlock_irqrestore_rcu_node(ssp, *flags);
+ spin_lock_irqsave_rcu_node(sdp, *flags);
+}
+
+/*
+ * Acquire the specified srcu_struct structure's ->lock, but check for
+ * excessive contention, which results in initiation of a transition
+ * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
+ * parameter permits this.
+ */
+static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags)
+{
+ if (spin_trylock_irqsave_rcu_node(ssp, *flags))
+ return;
+ spin_lock_irqsave_rcu_node(ssp, *flags);
+ spin_lock_irqsave_check_contention(ssp);
+}
+
+/*
* First-use initialization of statically allocated srcu_struct
* structure. Wiring up the combining tree is more than can be
* done with compile-time initialization, so this check is added
@@ -343,7 +511,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
return sum;
}
-#define SRCU_INTERVAL 1
+#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
+#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances.
/*
* Return grace-period delay, zero if there are expedited grace
@@ -351,10 +522,18 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
*/
static unsigned long srcu_get_delay(struct srcu_struct *ssp)
{
- if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq),
- READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
- return 0;
- return SRCU_INTERVAL;
+ unsigned long jbase = SRCU_INTERVAL;
+
+ if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+ jbase = 0;
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
+ jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
+ if (!jbase) {
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
+ jbase = 1;
+ }
+ return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
}
/**
@@ -382,13 +561,20 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
return; /* Forgot srcu_barrier(), so just leak it! */
}
if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+ WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) ||
WARN_ON(srcu_readers_active(ssp))) {
- pr_info("%s: Active srcu_struct %p state: %d\n",
- __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)));
+ pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
+ __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)),
+ rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed);
return; /* Caller forgot to stop doing call_srcu()? */
}
- free_percpu(ssp->sda);
- ssp->sda = NULL;
+ if (!ssp->sda_is_static) {
+ free_percpu(ssp->sda);
+ ssp->sda = NULL;
+ }
+ kfree(ssp->node);
+ ssp->node = NULL;
+ ssp->srcu_size_state = SRCU_SIZE_SMALL;
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
@@ -434,9 +620,13 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
*/
static void srcu_gp_start(struct srcu_struct *ssp)
{
- struct srcu_data *sdp = this_cpu_ptr(ssp->sda);
+ struct srcu_data *sdp;
int state;
+ if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+ sdp = per_cpu_ptr(ssp->sda, 0);
+ else
+ sdp = this_cpu_ptr(ssp->sda);
lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock));
WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
spin_lock_rcu_node(sdp); /* Interrupts already disabled. */
@@ -445,6 +635,8 @@ static void srcu_gp_start(struct srcu_struct *ssp)
(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
rcu_seq_snap(&ssp->srcu_gp_seq));
spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
+ WRITE_ONCE(ssp->srcu_gp_start, jiffies);
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
rcu_seq_start(&ssp->srcu_gp_seq);
state = rcu_seq_state(ssp->srcu_gp_seq);
@@ -517,7 +709,9 @@ static void srcu_gp_end(struct srcu_struct *ssp)
int idx;
unsigned long mask;
struct srcu_data *sdp;
+ unsigned long sgsne;
struct srcu_node *snp;
+ int ss_state;
/* Prevent more than one additional grace period. */
mutex_lock(&ssp->srcu_cb_mutex);
@@ -526,7 +720,7 @@ static void srcu_gp_end(struct srcu_struct *ssp)
spin_lock_irq_rcu_node(ssp);
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
- cbdelay = srcu_get_delay(ssp);
+ cbdelay = !!srcu_get_delay(ssp);
WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
@@ -537,38 +731,45 @@ static void srcu_gp_end(struct srcu_struct *ssp)
/* A new grace period can start at this point. But only one. */
/* Initiate callback invocation as needed. */
- idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
- srcu_for_each_node_breadth_first(ssp, snp) {
- spin_lock_irq_rcu_node(snp);
- cbs = false;
- last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
- if (last_lvl)
- cbs = snp->srcu_have_cbs[idx] == gpseq;
- snp->srcu_have_cbs[idx] = gpseq;
- rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
- if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
- WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
- mask = snp->srcu_data_have_cbs[idx];
- snp->srcu_data_have_cbs[idx] = 0;
- spin_unlock_irq_rcu_node(snp);
- if (cbs)
- srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
-
- /* Occasionally prevent srcu_data counter wrap. */
- if (!(gpseq & counter_wrap_check) && last_lvl)
- for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
- sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_irqsave_rcu_node(sdp, flags);
- if (ULONG_CMP_GE(gpseq,
- sdp->srcu_gp_seq_needed + 100))
- sdp->srcu_gp_seq_needed = gpseq;
- if (ULONG_CMP_GE(gpseq,
- sdp->srcu_gp_seq_needed_exp + 100))
- sdp->srcu_gp_seq_needed_exp = gpseq;
- spin_unlock_irqrestore_rcu_node(sdp, flags);
- }
+ ss_state = smp_load_acquire(&ssp->srcu_size_state);
+ if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
+ srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, 0), cbdelay);
+ } else {
+ idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
+ srcu_for_each_node_breadth_first(ssp, snp) {
+ spin_lock_irq_rcu_node(snp);
+ cbs = false;
+ last_lvl = snp >= ssp->level[rcu_num_lvls - 1];
+ if (last_lvl)
+ cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq;
+ snp->srcu_have_cbs[idx] = gpseq;
+ rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
+ sgsne = snp->srcu_gp_seq_needed_exp;
+ if (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, gpseq))
+ WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
+ if (ss_state < SRCU_SIZE_BIG)
+ mask = ~0;
+ else
+ mask = snp->srcu_data_have_cbs[idx];
+ snp->srcu_data_have_cbs[idx] = 0;
+ spin_unlock_irq_rcu_node(snp);
+ if (cbs)
+ srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay);
+ }
}
+ /* Occasionally prevent srcu_data counter wrap. */
+ if (!(gpseq & counter_wrap_check))
+ for_each_possible_cpu(cpu) {
+ sdp = per_cpu_ptr(ssp->sda, cpu);
+ spin_lock_irqsave_rcu_node(sdp, flags);
+ if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
+ sdp->srcu_gp_seq_needed = gpseq;
+ if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
+ sdp->srcu_gp_seq_needed_exp = gpseq;
+ spin_unlock_irqrestore_rcu_node(sdp, flags);
+ }
+
/* Callback initiation done, allow grace periods after next. */
mutex_unlock(&ssp->srcu_cb_mutex);
@@ -583,6 +784,14 @@ static void srcu_gp_end(struct srcu_struct *ssp)
} else {
spin_unlock_irq_rcu_node(ssp);
}
+
+ /* Transition to big if needed. */
+ if (ss_state != SRCU_SIZE_SMALL && ss_state != SRCU_SIZE_BIG) {
+ if (ss_state == SRCU_SIZE_ALLOC)
+ init_srcu_struct_nodes(ssp, GFP_KERNEL);
+ else
+ smp_store_release(&ssp->srcu_size_state, ss_state + 1);
+ }
}
/*
@@ -596,20 +805,24 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
unsigned long s)
{
unsigned long flags;
+ unsigned long sgsne;
- for (; snp != NULL; snp = snp->srcu_parent) {
- if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
- ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
- return;
- spin_lock_irqsave_rcu_node(snp, flags);
- if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
+ if (snp)
+ for (; snp != NULL; snp = snp->srcu_parent) {
+ sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
+ if (rcu_seq_done(&ssp->srcu_gp_seq, s) ||
+ (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
+ return;
+ spin_lock_irqsave_rcu_node(snp, flags);
+ sgsne = snp->srcu_gp_seq_needed_exp;
+ if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) {
+ spin_unlock_irqrestore_rcu_node(snp, flags);
+ return;
+ }
+ WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
spin_unlock_irqrestore_rcu_node(snp, flags);
- return;
}
- WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(snp, flags);
- }
- spin_lock_irqsave_rcu_node(ssp, flags);
+ spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
spin_unlock_irqrestore_rcu_node(ssp, flags);
@@ -630,39 +843,47 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
{
unsigned long flags;
int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
- struct srcu_node *snp = sdp->mynode;
+ unsigned long sgsne;
+ struct srcu_node *snp;
+ struct srcu_node *snp_leaf;
unsigned long snp_seq;
- /* Each pass through the loop does one level of the srcu_node tree. */
- for (; snp != NULL; snp = snp->srcu_parent) {
- if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode)
- return; /* GP already done and CBs recorded. */
- spin_lock_irqsave_rcu_node(snp, flags);
- if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
+ /* Ensure that snp node tree is fully initialized before traversing it */
+ if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+ snp_leaf = NULL;
+ else
+ snp_leaf = sdp->mynode;
+
+ if (snp_leaf)
+ /* Each pass through the loop does one level of the srcu_node tree. */
+ for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
+ if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != snp_leaf)
+ return; /* GP already done and CBs recorded. */
+ spin_lock_irqsave_rcu_node(snp, flags);
snp_seq = snp->srcu_have_cbs[idx];
- if (snp == sdp->mynode && snp_seq == s)
- snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
- spin_unlock_irqrestore_rcu_node(snp, flags);
- if (snp == sdp->mynode && snp_seq != s) {
- srcu_schedule_cbs_sdp(sdp, do_norm
- ? SRCU_INTERVAL
- : 0);
+ if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) {
+ if (snp == snp_leaf && snp_seq == s)
+ snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+ spin_unlock_irqrestore_rcu_node(snp, flags);
+ if (snp == snp_leaf && snp_seq != s) {
+ srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0);
+ return;
+ }
+ if (!do_norm)
+ srcu_funnel_exp_start(ssp, snp, s);
return;
}
- if (!do_norm)
- srcu_funnel_exp_start(ssp, snp, s);
- return;
+ snp->srcu_have_cbs[idx] = s;
+ if (snp == snp_leaf)
+ snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
+ sgsne = snp->srcu_gp_seq_needed_exp;
+ if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s)))
+ WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
+ spin_unlock_irqrestore_rcu_node(snp, flags);
}
- snp->srcu_have_cbs[idx] = s;
- if (snp == sdp->mynode)
- snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
- if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
- WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
- spin_unlock_irqrestore_rcu_node(snp, flags);
- }
/* Top of tree, must ensure the grace period will be started. */
- spin_lock_irqsave_rcu_node(ssp, flags);
+ spin_lock_irqsave_ssp_contention(ssp, &flags);
if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
/*
* Record need for grace period s. Pair with load
@@ -678,9 +899,15 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
srcu_gp_start(ssp);
+
+ // And how can that list_add() in the "else" clause
+ // possibly be safe for concurrent execution? Well,
+ // it isn't. And it does not have to be. After all, it
+ // can only be executed during early boot when there is only
+ // the one boot CPU running with interrupts still disabled.
if (likely(srcu_init_done))
queue_delayed_work(rcu_gp_wq, &ssp->work,
- srcu_get_delay(ssp));
+ !!srcu_get_delay(ssp));
else if (list_empty(&ssp->work.work.entry))
list_add(&ssp->work.work.entry, &srcu_boot_list);
}
@@ -814,11 +1041,17 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
bool needgp = false;
unsigned long s;
struct srcu_data *sdp;
+ struct srcu_node *sdp_mynode;
+ int ss_state;
check_init_srcu_struct(ssp);
idx = srcu_read_lock(ssp);
- sdp = raw_cpu_ptr(ssp->sda);
- spin_lock_irqsave_rcu_node(sdp, flags);
+ ss_state = smp_load_acquire(&ssp->srcu_size_state);
+ if (ss_state < SRCU_SIZE_WAIT_CALL)
+ sdp = per_cpu_ptr(ssp->sda, 0);
+ else
+ sdp = raw_cpu_ptr(ssp->sda);
+ spin_lock_irqsave_sdp_contention(sdp, &flags);
if (rhp)
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
rcu_segcblist_advance(&sdp->srcu_cblist,
@@ -834,10 +1067,17 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
needexp = true;
}
spin_unlock_irqrestore_rcu_node(sdp, flags);
+
+ /* Ensure that snp node tree is fully initialized before traversing it */
+ if (ss_state < SRCU_SIZE_WAIT_BARRIER)
+ sdp_mynode = NULL;
+ else
+ sdp_mynode = sdp->mynode;
+
if (needgp)
srcu_funnel_gp_start(ssp, sdp, s, do_norm);
else if (needexp)
- srcu_funnel_exp_start(ssp, sdp->mynode, s);
+ srcu_funnel_exp_start(ssp, sdp_mynode, s);
srcu_read_unlock(ssp, idx);
return s;
}
@@ -1097,6 +1337,28 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
complete(&ssp->srcu_barrier_completion);
}
+/*
+ * Enqueue an srcu_barrier() callback on the specified srcu_data
+ * structure's ->cblist. but only if that ->cblist already has at least one
+ * callback enqueued. Note that if a CPU already has callbacks enqueue,
+ * it must have already registered the need for a future grace period,
+ * so all we need do is enqueue a callback that will use the same grace
+ * period as the last callback already in the queue.
+ */
+static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp)
+{
+ spin_lock_irq_rcu_node(sdp);
+ atomic_inc(&ssp->srcu_barrier_cpu_cnt);
+ sdp->srcu_barrier_head.func = srcu_barrier_cb;
+ debug_rcu_head_queue(&sdp->srcu_barrier_head);
+ if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
+ &sdp->srcu_barrier_head)) {
+ debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
+ atomic_dec(&ssp->srcu_barrier_cpu_cnt);
+ }
+ spin_unlock_irq_rcu_node(sdp);
+}
+
/**
* srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
* @ssp: srcu_struct on which to wait for in-flight callbacks.
@@ -1104,7 +1366,7 @@ static void srcu_barrier_cb(struct rcu_head *rhp)
void srcu_barrier(struct srcu_struct *ssp)
{
int cpu;
- struct srcu_data *sdp;
+ int idx;
unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq);
check_init_srcu_struct(ssp);
@@ -1120,27 +1382,13 @@ void srcu_barrier(struct srcu_struct *ssp)
/* Initial count prevents reaching zero until all CBs are posted. */
atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
- /*
- * Each pass through this loop enqueues a callback, but only
- * on CPUs already having callbacks enqueued. Note that if
- * a CPU already has callbacks enqueue, it must have already
- * registered the need for a future grace period, so all we
- * need do is enqueue a callback that will use the same
- * grace period as the last callback already in the queue.
- */
- for_each_possible_cpu(cpu) {
- sdp = per_cpu_ptr(ssp->sda, cpu);
- spin_lock_irq_rcu_node(sdp);
- atomic_inc(&ssp->srcu_barrier_cpu_cnt);
- sdp->srcu_barrier_head.func = srcu_barrier_cb;
- debug_rcu_head_queue(&sdp->srcu_barrier_head);
- if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
- &sdp->srcu_barrier_head)) {
- debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
- atomic_dec(&ssp->srcu_barrier_cpu_cnt);
- }
- spin_unlock_irq_rcu_node(sdp);
- }
+ idx = srcu_read_lock(ssp);
+ if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
+ srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
+ else
+ for_each_possible_cpu(cpu)
+ srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
+ srcu_read_unlock(ssp, idx);
/* Remove the initial count, at which point reaching zero can happen. */
if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
@@ -1214,6 +1462,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
srcu_flip(ssp);
spin_lock_irq_rcu_node(ssp);
rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+ ssp->srcu_n_exp_nodelay = 0;
spin_unlock_irq_rcu_node(ssp);
}
@@ -1228,6 +1477,7 @@ static void srcu_advance_state(struct srcu_struct *ssp)
mutex_unlock(&ssp->srcu_gp_mutex);
return; /* readers present, retry later. */
}
+ ssp->srcu_n_exp_nodelay = 0;
srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */
}
}
@@ -1318,12 +1568,28 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
*/
static void process_srcu(struct work_struct *work)
{
+ unsigned long curdelay;
+ unsigned long j;
struct srcu_struct *ssp;
ssp = container_of(work, struct srcu_struct, work.work);
srcu_advance_state(ssp);
- srcu_reschedule(ssp, srcu_get_delay(ssp));
+ curdelay = srcu_get_delay(ssp);
+ if (curdelay) {
+ WRITE_ONCE(ssp->reschedule_count, 0);
+ } else {
+ j = jiffies;
+ if (READ_ONCE(ssp->reschedule_jiffies) == j) {
+ WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
+ if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+ curdelay = 1;
+ } else {
+ WRITE_ONCE(ssp->reschedule_count, 1);
+ WRITE_ONCE(ssp->reschedule_jiffies, j);
+ }
+ }
+ srcu_reschedule(ssp, curdelay);
}
void srcutorture_get_gp_data(enum rcutorture_type test_type,
@@ -1337,43 +1603,69 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
}
EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
+static const char * const srcu_size_state_name[] = {
+ "SRCU_SIZE_SMALL",
+ "SRCU_SIZE_ALLOC",
+ "SRCU_SIZE_WAIT_BARRIER",
+ "SRCU_SIZE_WAIT_CALL",
+ "SRCU_SIZE_WAIT_CBS1",
+ "SRCU_SIZE_WAIT_CBS2",
+ "SRCU_SIZE_WAIT_CBS3",
+ "SRCU_SIZE_WAIT_CBS4",
+ "SRCU_SIZE_BIG",
+ "SRCU_SIZE_???",
+};
+
void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
{
int cpu;
int idx;
unsigned long s0 = 0, s1 = 0;
+ int ss_state = READ_ONCE(ssp->srcu_size_state);
+ int ss_state_idx = ss_state;
idx = ssp->srcu_idx & 0x1;
- pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):",
- tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx);
- for_each_possible_cpu(cpu) {
- unsigned long l0, l1;
- unsigned long u0, u1;
- long c0, c1;
- struct srcu_data *sdp;
-
- sdp = per_cpu_ptr(ssp->sda, cpu);
- u0 = data_race(sdp->srcu_unlock_count[!idx]);
- u1 = data_race(sdp->srcu_unlock_count[idx]);
-
- /*
- * Make sure that a lock is always counted if the corresponding
- * unlock is counted.
- */
- smp_rmb();
-
- l0 = data_race(sdp->srcu_lock_count[!idx]);
- l1 = data_race(sdp->srcu_lock_count[idx]);
-
- c0 = l0 - u0;
- c1 = l1 - u1;
- pr_cont(" %d(%ld,%ld %c)",
- cpu, c0, c1,
- "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
- s0 += c0;
- s1 += c1;
+ if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
+ ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
+ pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
+ tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state,
+ srcu_size_state_name[ss_state_idx]);
+ if (!ssp->sda) {
+ // Called after cleanup_srcu_struct(), perhaps.
+ pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n");
+ } else {
+ pr_cont(" per-CPU(idx=%d):", idx);
+ for_each_possible_cpu(cpu) {
+ unsigned long l0, l1;
+ unsigned long u0, u1;
+ long c0, c1;
+ struct srcu_data *sdp;
+
+ sdp = per_cpu_ptr(ssp->sda, cpu);
+ u0 = data_race(sdp->srcu_unlock_count[!idx]);
+ u1 = data_race(sdp->srcu_unlock_count[idx]);
+
+ /*
+ * Make sure that a lock is always counted if the corresponding
+ * unlock is counted.
+ */
+ smp_rmb();
+
+ l0 = data_race(sdp->srcu_lock_count[!idx]);
+ l1 = data_race(sdp->srcu_lock_count[idx]);
+
+ c0 = l0 - u0;
+ c1 = l1 - u1;
+ pr_cont(" %d(%ld,%ld %c)",
+ cpu, c0, c1,
+ "C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
+ s0 += c0;
+ s1 += c1;
+ }
+ pr_cont(" T(%ld,%ld)\n", s0, s1);
}
- pr_cont(" T(%ld,%ld)\n", s0, s1);
+ if (SRCU_SIZING_IS_TORTURE())
+ srcu_transition_to_big(ssp);
}
EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
@@ -1390,6 +1682,17 @@ void __init srcu_init(void)
{
struct srcu_struct *ssp;
+ /* Decide on srcu_struct-size strategy. */
+ if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) {
+ if (nr_cpu_ids >= big_cpu_lim) {
+ convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention.
+ pr_info("%s: Setting srcu_struct sizes to big.\n", __func__);
+ } else {
+ convert_to_big = SRCU_SIZING_NONE | SRCU_SIZING_CONTEND;
+ pr_info("%s: Setting srcu_struct sizes based on contention.\n", __func__);
+ }
+ }
+
/*
* Once that is set, call_srcu() can follow the normal path and
* queue delayed work. This must follow RCU workqueues creation
@@ -1400,6 +1703,8 @@ void __init srcu_init(void)
ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
work.work.entry);
list_del_init(&ssp->work.work.entry);
+ if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL)
+ ssp->srcu_size_state = SRCU_SIZE_ALLOC;
queue_work(rcu_gp_wq, &ssp->work.work);
}
}
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 33d896d85902..5cefc702158f 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -111,7 +111,7 @@ static void rcu_sync_func(struct rcu_head *rhp)
* a slowpath during the update. After this function returns, all
* subsequent calls to rcu_sync_is_idle() will return false, which
* tells readers to stay off their fastpaths. A later call to
- * rcu_sync_exit() re-enables reader slowpaths.
+ * rcu_sync_exit() re-enables reader fastpaths.
*
* When called in isolation, rcu_sync_enter() must wait for a grace
* period, however, closely spaced calls to rcu_sync_enter() can
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 99cf3a13954c..3925e32159b5 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -46,7 +46,7 @@ struct rcu_tasks_percpu {
/**
* struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
- * @cbs_wq: Wait queue allowing new callback to get kthread's attention.
+ * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
* @cbs_gbl_lock: Lock protecting callback list.
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
* @gp_func: This flavor's grace-period-wait function.
@@ -77,7 +77,7 @@ struct rcu_tasks_percpu {
* @kname: This flavor's kthread name.
*/
struct rcu_tasks {
- struct wait_queue_head cbs_wq;
+ struct rcuwait cbs_wait;
raw_spinlock_t cbs_gbl_lock;
int gp_state;
int gp_sleep;
@@ -113,11 +113,11 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
#define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \
- .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \
+ .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), \
}; \
static struct rcu_tasks rt_name = \
{ \
- .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
+ .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait), \
.cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \
.gp_func = gp, \
.call_func = call, \
@@ -143,6 +143,11 @@ module_param(rcu_task_ipi_delay, int, 0644);
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
module_param(rcu_task_stall_timeout, int, 0644);
+#define RCU_TASK_STALL_INFO (HZ * 10)
+static int rcu_task_stall_info __read_mostly = RCU_TASK_STALL_INFO;
+module_param(rcu_task_stall_info, int, 0644);
+static int rcu_task_stall_info_mult __read_mostly = 3;
+module_param(rcu_task_stall_info_mult, int, 0444);
static int rcu_task_enqueue_lim __read_mostly = -1;
module_param(rcu_task_enqueue_lim, int, 0444);
@@ -261,14 +266,16 @@ static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
rtp = rtpcp->rtpp;
- wake_up(&rtp->cbs_wq);
+ rcuwait_wake_up(&rtp->cbs_wait);
}
// Enqueue a callback for the specified flavor of Tasks RCU.
static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
struct rcu_tasks *rtp)
{
+ int chosen_cpu;
unsigned long flags;
+ int ideal_cpu;
unsigned long j;
bool needadjust = false;
bool needwake;
@@ -278,8 +285,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
rhp->func = func;
local_irq_save(flags);
rcu_read_lock();
- rtpcp = per_cpu_ptr(rtp->rtpcpu,
- smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift));
+ ideal_cpu = smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift);
+ chosen_cpu = cpumask_next(ideal_cpu - 1, cpu_possible_mask);
+ rtpcp = per_cpu_ptr(rtp->rtpcpu, chosen_cpu);
if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled.
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
j = jiffies;
@@ -460,7 +468,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
}
}
- if (rcu_segcblist_empty(&rtpcp->cblist))
+ if (rcu_segcblist_empty(&rtpcp->cblist) || !cpu_possible(cpu))
return;
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
@@ -509,7 +517,9 @@ static int __noreturn rcu_tasks_kthread(void *arg)
set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
/* If there were none, wait a bit and start over. */
- wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp)));
+ rcuwait_wait_event(&rtp->cbs_wait,
+ (needgpcb = rcu_tasks_need_gpcb(rtp)),
+ TASK_IDLE);
if (needgpcb & 0x2) {
// Wait for one grace period.
@@ -548,8 +558,15 @@ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
static void __init rcu_tasks_bootup_oddness(void)
{
#if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
+ int rtsimc;
+
if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
+ rtsimc = clamp(rcu_task_stall_info_mult, 1, 10);
+ if (rtsimc != rcu_task_stall_info_mult) {
+ pr_info("\tTasks-RCU CPU stall info multiplier clamped to %d (rcu_task_stall_info_mult).\n", rtsimc);
+ rcu_task_stall_info_mult = rtsimc;
+ }
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_RCU
pr_info("\tTrampoline variant of Tasks RCU enabled.\n");
@@ -568,7 +585,17 @@ static void __init rcu_tasks_bootup_oddness(void)
/* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
{
- struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each...
+ int cpu;
+ bool havecbs = false;
+
+ for_each_possible_cpu(cpu) {
+ struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+ if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
+ havecbs = true;
+ break;
+ }
+ }
pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
rtp->kname,
tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
@@ -576,7 +603,7 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
".k"[!!data_race(rtp->kthread_ptr)],
- ".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))],
+ ".C"[havecbs],
s);
}
#endif // #ifndef CONFIG_TINY_RCU
@@ -592,10 +619,15 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t);
/* Wait for one RCU-tasks grace period. */
static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
{
- struct task_struct *g, *t;
- unsigned long lastreport;
- LIST_HEAD(holdouts);
+ struct task_struct *g;
int fract;
+ LIST_HEAD(holdouts);
+ unsigned long j;
+ unsigned long lastinfo;
+ unsigned long lastreport;
+ bool reported = false;
+ int rtsi;
+ struct task_struct *t;
set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
rtp->pregp_func();
@@ -621,30 +653,50 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
* is empty, we are done.
*/
lastreport = jiffies;
+ lastinfo = lastreport;
+ rtsi = READ_ONCE(rcu_task_stall_info);
// Start off with initial wait and slowly back off to 1 HZ wait.
fract = rtp->init_fract;
while (!list_empty(&holdouts)) {
+ ktime_t exp;
bool firstreport;
bool needreport;
int rtst;
- /* Slowly back off waiting for holdouts */
+ // Slowly back off waiting for holdouts
set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS);
- schedule_timeout_idle(fract);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ schedule_timeout_idle(fract);
+ } else {
+ exp = jiffies_to_nsecs(fract);
+ __set_current_state(TASK_IDLE);
+ schedule_hrtimeout_range(&exp, jiffies_to_nsecs(HZ / 2), HRTIMER_MODE_REL_HARD);
+ }
if (fract < HZ)
fract++;
rtst = READ_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
- if (needreport)
+ if (needreport) {
lastreport = jiffies;
+ reported = true;
+ }
firstreport = true;
WARN_ON(signal_pending(current));
set_tasks_gp_state(rtp, RTGS_SCAN_HOLDOUTS);
rtp->holdouts_func(&holdouts, needreport, &firstreport);
+
+ // Print pre-stall informational messages if needed.
+ j = jiffies;
+ if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
+ lastinfo = j;
+ rtsi = rtsi * rcu_task_stall_info_mult;
+ pr_info("%s: %s grace period %lu is %lu jiffies old.\n",
+ __func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
+ }
}
set_tasks_gp_state(rtp, RTGS_POST_GP);
@@ -950,6 +1002,9 @@ static void rcu_tasks_be_rude(struct work_struct *work)
// Wait for one rude RCU-tasks grace period.
static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
{
+ if (num_online_cpus() <= 1)
+ return; // Fastpath for only one CPU.
+
rtp->n_ipis += cpumask_weight(cpu_online_mask);
schedule_on_each_cpu(rcu_tasks_be_rude);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4b8189455d5..c25ba442044a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1679,6 +1679,8 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
+ if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap))
+ WRITE_ONCE(rdp->last_sched_clock, jiffies);
WRITE_ONCE(rdp->gpwrap, false);
rcu_gpnum_ovf(rnp, rdp);
return ret;
@@ -1705,11 +1707,37 @@ static void note_gp_changes(struct rcu_data *rdp)
rcu_gp_kthread_wake();
}
+static atomic_t *rcu_gp_slow_suppress;
+
+/* Register a counter to suppress debugging grace-period delays. */
+void rcu_gp_slow_register(atomic_t *rgssp)
+{
+ WARN_ON_ONCE(rcu_gp_slow_suppress);
+
+ WRITE_ONCE(rcu_gp_slow_suppress, rgssp);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_register);
+
+/* Unregister a counter, with NULL for not caring which. */
+void rcu_gp_slow_unregister(atomic_t *rgssp)
+{
+ WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress);
+
+ WRITE_ONCE(rcu_gp_slow_suppress, NULL);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister);
+
+static bool rcu_gp_slow_is_suppressed(void)
+{
+ atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress);
+
+ return rgssp && atomic_read(rgssp);
+}
+
static void rcu_gp_slow(int delay)
{
- if (delay > 0 &&
- !(rcu_seq_ctr(rcu_state.gp_seq) %
- (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
+ if (!rcu_gp_slow_is_suppressed() && delay > 0 &&
+ !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
schedule_timeout_idle(delay);
}
@@ -2096,14 +2124,29 @@ static noinline void rcu_gp_cleanup(void)
/* Advance CBs to reduce false positives below. */
offloaded = rcu_rdp_is_offloaded(rdp);
if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
+
+ // We get here if a grace period was needed (“needgp”)
+ // and the above call to rcu_accelerate_cbs() did not set
+ // the RCU_GP_FLAG_INIT bit in ->gp_state (which records
+ // the need for another grace period).  The purpose
+ // of the “offloaded” check is to avoid invoking
+ // rcu_accelerate_cbs() on an offloaded CPU because we do not
+ // hold the ->nocb_lock needed to safely access an offloaded
+ // ->cblist.  We do not want to acquire that lock because
+ // it can be heavily contended during callback floods.
+
WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
- trace_rcu_grace_period(rcu_state.name,
- rcu_state.gp_seq,
- TPS("newreq"));
+ trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
} else {
- WRITE_ONCE(rcu_state.gp_flags,
- rcu_state.gp_flags & RCU_GP_FLAG_INIT);
+
+ // We get here either if there is no need for an
+ // additional grace period or if rcu_accelerate_cbs() has
+ // already set the RCU_GP_FLAG_INIT bit in ->gp_flags. 
+ // So all we need to do is to clear all of the other
+ // ->gp_flags bits.
+
+ WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT);
}
raw_spin_unlock_irq_rcu_node(rnp);
@@ -2609,6 +2652,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
*/
void rcu_sched_clock_irq(int user)
{
+ unsigned long j;
+
+ if (IS_ENABLED(CONFIG_PROVE_RCU)) {
+ j = jiffies;
+ WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
+ __this_cpu_write(rcu_data.last_sched_clock, j);
+ }
trace_rcu_utilization(TPS("Start scheduler-tick"));
lockdep_assert_irqs_disabled();
raw_cpu_inc(rcu_data.ticks_this_gp);
@@ -2624,6 +2674,8 @@ void rcu_sched_clock_irq(int user)
rcu_flavor_sched_clock_irq(user);
if (rcu_pending(user))
invoke_rcu_core();
+ if (user)
+ rcu_tasks_classic_qs(current, false);
lockdep_assert_irqs_disabled();
trace_rcu_utilization(TPS("End scheduler-tick"));
@@ -3717,7 +3769,9 @@ static int rcu_blocking_is_gp(void)
{
int ret;
- if (IS_ENABLED(CONFIG_PREEMPTION))
+ // Invoking preempt_model_*() too early gets a splat.
+ if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE ||
+ preempt_model_full() || preempt_model_rt())
return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
might_sleep(); /* Check for RCU read-side critical section. */
preempt_disable();
@@ -4179,6 +4233,7 @@ rcu_boot_init_percpu_data(int cpu)
rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
+ rdp->last_sched_clock = jiffies;
rdp->cpu = cpu;
rcu_boot_init_nocb_percpu_data(rdp);
}
@@ -4471,6 +4526,51 @@ static int rcu_pm_notify(struct notifier_block *self,
return NOTIFY_OK;
}
+#ifdef CONFIG_RCU_EXP_KTHREAD
+struct kthread_worker *rcu_exp_gp_kworker;
+struct kthread_worker *rcu_exp_par_gp_kworker;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+ const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker";
+ const char *gp_kworker_name = "rcu_exp_gp_kthread_worker";
+ struct sched_param param = { .sched_priority = kthread_prio };
+
+ rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name);
+ if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) {
+ pr_err("Failed to create %s!\n", gp_kworker_name);
+ return;
+ }
+
+ rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name);
+ if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) {
+ pr_err("Failed to create %s!\n", par_gp_kworker_name);
+ kthread_destroy_worker(rcu_exp_gp_kworker);
+ return;
+ }
+
+ sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, &param);
+ sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO,
+ &param);
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+struct workqueue_struct *rcu_par_gp_wq;
+
+static void __init rcu_start_exp_gp_kworkers(void)
+{
+}
+
+static inline void rcu_alloc_par_gp_wq(void)
+{
+ rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+ WARN_ON(!rcu_par_gp_wq);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
/*
* Spawn the kthreads that handle RCU's grace periods.
*/
@@ -4480,6 +4580,7 @@ static int __init rcu_spawn_gp_kthread(void)
struct rcu_node *rnp;
struct sched_param sp;
struct task_struct *t;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
rcu_scheduler_fully_active = 1;
t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
@@ -4497,9 +4598,17 @@ static int __init rcu_spawn_gp_kthread(void)
smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
wake_up_process(t);
- rcu_spawn_nocb_kthreads();
- rcu_spawn_boost_kthreads();
+ /* This is a pre-SMP initcall, we expect a single CPU */
+ WARN_ON(num_online_cpus() > 1);
+ /*
+ * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu()
+ * due to rcu_scheduler_fully_active.
+ */
+ rcu_spawn_cpu_nocb_kthread(smp_processor_id());
+ rcu_spawn_one_boost_kthread(rdp->mynode);
rcu_spawn_core_kthreads();
+ /* Create kthread worker for expedited GPs */
+ rcu_start_exp_gp_kworkers();
return 0;
}
early_initcall(rcu_spawn_gp_kthread);
@@ -4745,7 +4854,6 @@ static void __init rcu_dump_rcu_node_tree(void)
}
struct workqueue_struct *rcu_gp_wq;
-struct workqueue_struct *rcu_par_gp_wq;
static void __init kfree_rcu_batch_init(void)
{
@@ -4782,7 +4890,7 @@ static void __init kfree_rcu_batch_init(void)
void __init rcu_init(void)
{
- int cpu;
+ int cpu = smp_processor_id();
rcu_early_boot_tests();
@@ -4802,17 +4910,15 @@ void __init rcu_init(void)
* or the scheduler are operational.
*/
pm_notifier(rcu_pm_notify, 0);
- for_each_online_cpu(cpu) {
- rcutree_prepare_cpu(cpu);
- rcu_cpu_starting(cpu);
- rcutree_online_cpu(cpu);
- }
+ WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
+ rcutree_prepare_cpu(cpu);
+ rcu_cpu_starting(cpu);
+ rcutree_online_cpu(cpu);
/* Create workqueue for Tree SRCU and for expedited GPs. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
- rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
- WARN_ON(!rcu_par_gp_wq);
+ rcu_alloc_par_gp_wq();
/* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 926673ebe355..2ccf5845957d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -10,6 +10,7 @@
*/
#include <linux/cache.h>
+#include <linux/kthread.h>
#include <linux/spinlock.h>
#include <linux/rtmutex.h>
#include <linux/threads.h>
@@ -23,7 +24,11 @@
/* Communicate arguments to a workqueue handler. */
struct rcu_exp_work {
unsigned long rew_s;
+#ifdef CONFIG_RCU_EXP_KTHREAD
+ struct kthread_work rew_work;
+#else
struct work_struct rew_work;
+#endif /* CONFIG_RCU_EXP_KTHREAD */
};
/* RCU's kthread states for tracing. */
@@ -254,6 +259,7 @@ struct rcu_data {
unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
+ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
int cpu;
};
@@ -364,6 +370,7 @@ struct rcu_state {
arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
/* Synchronize offline with */
/* GP pre-initialization. */
+ int nocb_is_setup; /* nocb is setup from boot */
};
/* Values for rcu_state structure's gp_flags field. */
@@ -421,7 +428,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static bool rcu_is_callbacks_kthread(void);
static void rcu_cpu_kthread_setup(unsigned int cpu);
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
-static void __init rcu_spawn_boost_kthreads(void);
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
static void rcu_preempt_deferred_qs(struct task_struct *t);
@@ -439,7 +445,6 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_cpu_nocb_kthread(int cpu);
-static void __init rcu_spawn_nocb_kthreads(void);
static void show_rcu_nocb_state(struct rcu_data *rdp);
static void rcu_nocb_lock(struct rcu_data *rdp);
static void rcu_nocb_unlock(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 60197ea24ceb..0f70f62039a9 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -334,15 +334,13 @@ fastpath:
* Select the CPUs within the specified rcu_node that the upcoming
* expedited grace period needs to wait for.
*/
-static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
{
int cpu;
unsigned long flags;
unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi;
int ret;
- struct rcu_exp_work *rewp =
- container_of(wp, struct rcu_exp_work, rew_work);
struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -417,13 +415,119 @@ retry_ipi:
rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
}
+static void rcu_exp_sel_wait_wake(unsigned long s);
+
+#ifdef CONFIG_RCU_EXP_KTHREAD
+static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
+{
+ struct rcu_exp_work *rewp =
+ container_of(wp, struct rcu_exp_work, rew_work);
+
+ __sync_rcu_exp_select_node_cpus(rewp);
+}
+
+static inline bool rcu_gp_par_worker_started(void)
+{
+ return !!READ_ONCE(rcu_exp_par_gp_kworker);
+}
+
+static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
+{
+ kthread_init_work(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+ /*
+ * Use rcu_exp_par_gp_kworker, because flushing a work item from
+ * another work item on the same kthread worker can result in
+ * deadlock.
+ */
+ kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work);
+}
+
+static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
+{
+ kthread_flush_work(&rnp->rew.rew_work);
+}
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct kthread_work *wp)
+{
+ struct rcu_exp_work *rewp;
+
+ rewp = container_of(wp, struct rcu_exp_work, rew_work);
+ rcu_exp_sel_wait_wake(rewp->rew_s);
+}
+
+static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
+{
+ kthread_init_work(&rew->rew_work, wait_rcu_exp_gp);
+ kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
+}
+
+static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
+{
+}
+#else /* !CONFIG_RCU_EXP_KTHREAD */
+static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+{
+ struct rcu_exp_work *rewp =
+ container_of(wp, struct rcu_exp_work, rew_work);
+
+ __sync_rcu_exp_select_node_cpus(rewp);
+}
+
+static inline bool rcu_gp_par_worker_started(void)
+{
+ return !!READ_ONCE(rcu_par_gp_wq);
+}
+
+static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
+{
+ int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
+
+ INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+ /* If all offline, queue the work on an unbound CPU. */
+ if (unlikely(cpu > rnp->grphi - rnp->grplo))
+ cpu = WORK_CPU_UNBOUND;
+ else
+ cpu += rnp->grplo;
+ queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
+}
+
+static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
+{
+ flush_work(&rnp->rew.rew_work);
+}
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct work_struct *wp)
+{
+ struct rcu_exp_work *rewp;
+
+ rewp = container_of(wp, struct rcu_exp_work, rew_work);
+ rcu_exp_sel_wait_wake(rewp->rew_s);
+}
+
+static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
+{
+ INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp);
+ queue_work(rcu_gp_wq, &rew->rew_work);
+}
+
+static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew)
+{
+ destroy_work_on_stack(&rew->rew_work);
+}
+#endif /* CONFIG_RCU_EXP_KTHREAD */
+
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
*/
static void sync_rcu_exp_select_cpus(void)
{
- int cpu;
struct rcu_node *rnp;
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
@@ -435,28 +539,21 @@ static void sync_rcu_exp_select_cpus(void)
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
- if (!READ_ONCE(rcu_par_gp_wq) ||
+ if (!rcu_gp_par_worker_started() ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
rcu_is_last_leaf_node(rnp)) {
- /* No workqueues yet or last leaf, do direct call. */
+ /* No worker started yet or last leaf, do direct call. */
sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
continue;
}
- INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
- cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1);
- /* If all offline, queue the work on an unbound CPU. */
- if (unlikely(cpu > rnp->grphi - rnp->grplo))
- cpu = WORK_CPU_UNBOUND;
- else
- cpu += rnp->grplo;
- queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
+ sync_rcu_exp_select_cpus_queue_work(rnp);
rnp->exp_need_flush = true;
}
- /* Wait for workqueue jobs (if any) to complete. */
+ /* Wait for jobs (if any) to complete. */
rcu_for_each_leaf_node(rnp)
if (rnp->exp_need_flush)
- flush_work(&rnp->rew.rew_work);
+ sync_rcu_exp_select_cpus_flush_work(rnp);
}
/*
@@ -496,7 +593,7 @@ static void synchronize_rcu_expedited_wait(void)
struct rcu_node *rnp_root = rcu_get_root();
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
- jiffies_stall = rcu_jiffies_till_stall_check();
+ jiffies_stall = rcu_exp_jiffies_till_stall_check();
jiffies_start = jiffies;
if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
if (synchronize_rcu_expedited_wait_once(1))
@@ -571,7 +668,7 @@ static void synchronize_rcu_expedited_wait(void)
dump_cpu_task(cpu);
}
}
- jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+ jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
}
}
@@ -622,17 +719,6 @@ static void rcu_exp_sel_wait_wake(unsigned long s)
rcu_exp_wait_wake(s);
}
-/*
- * Work-queue handler to drive an expedited grace period forward.
- */
-static void wait_rcu_exp_gp(struct work_struct *wp)
-{
- struct rcu_exp_work *rewp;
-
- rewp = container_of(wp, struct rcu_exp_work, rew_work);
- rcu_exp_sel_wait_wake(rewp->rew_s);
-}
-
#ifdef CONFIG_PREEMPT_RCU
/*
@@ -848,20 +934,19 @@ void synchronize_rcu_expedited(void)
} else {
/* Marshall arguments & schedule the expedited grace period. */
rew.rew_s = s;
- INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
- queue_work(rcu_gp_wq, &rew.rew_work);
+ synchronize_rcu_expedited_queue_work(&rew);
}
/* Wait for expedited grace period to complete. */
rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s));
- smp_mb(); /* Workqueue actions happen before return. */
+ smp_mb(); /* Work actions happen before return. */
/* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex);
if (likely(!boottime))
- destroy_work_on_stack(&rew.rew_work);
+ synchronize_rcu_expedited_destroy_work(&rew);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 636d0546a4e9..46694e13398a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -60,9 +60,6 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
* If the list is invalid, a warning is emitted and all CPUs are offloaded.
*/
-
-static bool rcu_nocb_is_setup;
-
static int __init rcu_nocb_setup(char *str)
{
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
@@ -72,7 +69,7 @@ static int __init rcu_nocb_setup(char *str)
cpumask_setall(rcu_nocb_mask);
}
}
- rcu_nocb_is_setup = true;
+ rcu_state.nocb_is_setup = true;
return 1;
}
__setup("rcu_nocbs", rcu_nocb_setup);
@@ -215,14 +212,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
init_swait_queue_head(&rnp->nocb_gp_wq[1]);
}
-/* Is the specified CPU a no-CBs CPU? */
-bool rcu_is_nocb_cpu(int cpu)
-{
- if (cpumask_available(rcu_nocb_mask))
- return cpumask_test_cpu(cpu, rcu_nocb_mask);
- return false;
-}
-
static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
struct rcu_data *rdp,
bool force, unsigned long flags)
@@ -1180,10 +1169,10 @@ void __init rcu_init_nohz(void)
return;
}
}
- rcu_nocb_is_setup = true;
+ rcu_state.nocb_is_setup = true;
}
- if (!rcu_nocb_is_setup)
+ if (!rcu_state.nocb_is_setup)
return;
#if defined(CONFIG_NO_HZ_FULL)
@@ -1241,7 +1230,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
struct task_struct *t;
struct sched_param sp;
- if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup)
+ if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup)
return;
/* If there already is an rcuo kthread, then nothing to do. */
@@ -1277,22 +1266,6 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
}
-/*
- * Once the scheduler is running, spawn rcuo kthreads for all online
- * no-CBs CPUs. This assumes that the early_initcall()s happen before
- * non-boot CPUs come online -- if this changes, we will need to add
- * some mutual exclusion.
- */
-static void __init rcu_spawn_nocb_kthreads(void)
-{
- int cpu;
-
- if (rcu_nocb_is_setup) {
- for_each_online_cpu(cpu)
- rcu_spawn_cpu_nocb_kthread(cpu);
- }
-}
-
/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
static int rcu_nocb_gp_stride = -1;
module_param(rcu_nocb_gp_stride, int, 0444);
@@ -1549,10 +1522,6 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
{
}
-static void __init rcu_spawn_nocb_kthreads(void)
-{
-}
-
static void show_rcu_nocb_state(struct rcu_data *rdp)
{
}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 8360d86db1c0..c8ba0fe17267 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -486,6 +486,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
t->rcu_read_unlock_special.s = 0;
if (special.b.need_qs) {
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
+ rdp->cpu_no_qs.b.norm = false;
rcu_report_qs_rdp(rdp);
udelay(rcu_unlock_delay);
} else {
@@ -660,7 +661,13 @@ static void rcu_read_unlock_special(struct task_struct *t)
expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.
- init_irq_work(&rdp->defer_qs_iw, rcu_preempt_deferred_qs_handler);
+ if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) &&
+ IS_ENABLED(CONFIG_PREEMPT_RT))
+ rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(
+ rcu_preempt_deferred_qs_handler);
+ else
+ init_irq_work(&rdp->defer_qs_iw,
+ rcu_preempt_deferred_qs_handler);
rdp->defer_qs_iw_pending = true;
irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
}
@@ -1124,7 +1131,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
raw_lockdep_assert_held_rcu_node(rnp);
- if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
+ if (!rnp->boost_kthread_task ||
+ (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
@@ -1226,18 +1234,6 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
free_cpumask_var(cm);
}
-/*
- * Spawn boost kthreads -- called as soon as the scheduler is running.
- */
-static void __init rcu_spawn_boost_kthreads(void)
-{
- struct rcu_node *rnp;
-
- rcu_for_each_leaf_node(rnp)
- if (rcu_rnp_online_cpus(rnp))
- rcu_spawn_one_boost_kthread(rnp);
-}
-
#else /* #ifdef CONFIG_RCU_BOOST */
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@@ -1263,10 +1259,6 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
{
}
-static void __init rcu_spawn_boost_kthreads(void)
-{
-}
-
#endif /* #else #ifdef CONFIG_RCU_BOOST */
/*
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 0c5d8516516a..a001e1e7a992 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly;
#define RCU_STALL_MIGHT_DIV 8
#define RCU_STALL_MIGHT_MIN (2 * HZ)
+int rcu_exp_jiffies_till_stall_check(void)
+{
+ int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout);
+ int exp_stall_delay_delta = 0;
+ int till_stall_check;
+
+ // Zero says to use rcu_cpu_stall_timeout, but in milliseconds.
+ if (!cpu_stall_timeout)
+ cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check());
+
+ // Limit check must be consistent with the Kconfig limits for
+ // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
+ // The minimum clamped value is "2UL", because at least one full
+ // tick has to be guaranteed.
+ till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
+
+ if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
+ WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
+
+#ifdef CONFIG_PROVE_RCU
+ /* Add extra ~25% out of till_stall_check. */
+ exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1;
+#endif
+
+ return till_stall_check + exp_stall_delay_delta;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check);
+
/* Limit-check stall timeouts specified at boottime and runtime. */
int rcu_jiffies_till_stall_check(void)
{
@@ -565,9 +593,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
for_each_possible_cpu(cpu)
totqlen += rcu_get_n_cbs_cpu(cpu);
- pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
+ pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
smp_processor_id(), (long)(jiffies - gps),
- (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+ (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
if (ndetected) {
rcu_dump_cpu_stacks();
@@ -626,9 +654,9 @@ static void print_cpu_stall(unsigned long gps)
raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
for_each_possible_cpu(cpu)
totqlen += rcu_get_n_cbs_cpu(cpu);
- pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
+ pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
jiffies - gps,
- (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+ (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
rcu_check_gp_kthread_expired_fqs_timer();
rcu_check_gp_kthread_starvation();
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 180ff9c41fa8..fc7fef575606 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
module_param(rcu_cpu_stall_suppress, int, 0644);
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
+int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
+module_param(rcu_exp_cpu_stall_timeout, int, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall
diff --git a/kernel/scftorture.c b/kernel/scftorture.c
index dcb0410950e4..5d113aa59e77 100644
--- a/kernel/scftorture.c
+++ b/kernel/scftorture.c
@@ -267,9 +267,10 @@ static void scf_handler(void *scfc_in)
}
this_cpu_inc(scf_invoked_count);
if (longwait <= 0) {
- if (!(r & 0xffc0))
+ if (!(r & 0xffc0)) {
udelay(r & 0x3f);
- goto out;
+ goto out;
+ }
}
if (r & 0xfff)
goto out;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51efaabac3e4..2a05096559a2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
@@ -8415,6 +8415,18 @@ static void __init preempt_dynamic_init(void)
}
}
+#define PREEMPT_MODEL_ACCESSOR(mode) \
+ bool preempt_model_##mode(void) \
+ { \
+ WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \
+ return preempt_dynamic_mode == preempt_dynamic_##mode; \
+ } \
+ EXPORT_SYMBOL_GPL(preempt_model_##mode)
+
+PREEMPT_MODEL_ACCESSOR(none);
+PREEMPT_MODEL_ACCESSOR(voluntary);
+PREEMPT_MODEL_ACCESSOR(full);
+
#else /* !CONFIG_PREEMPT_DYNAMIC */
static inline void preempt_dynamic_init(void) { }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d4bd299d67ab..a68482d66535 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3829,11 +3829,11 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
se->avg.runnable_sum = se->avg.runnable_avg * divider;
- se->avg.load_sum = divider;
- if (se_weight(se)) {
- se->avg.load_sum =
- div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se));
- }
+ se->avg.load_sum = se->avg.load_avg * divider;
+ if (se_weight(se) < se->avg.load_sum)
+ se->avg.load_sum = div_u64(se->avg.load_sum, se_weight(se));
+ else
+ se->avg.load_sum = 1;
enqueue_load_avg(cfs_rq, se);
cfs_rq->avg.util_avg += se->avg.util_avg;
diff --git a/kernel/smp.c b/kernel/smp.c
index 65a630f62363..df9393aeae28 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -183,7 +183,9 @@ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
static DEFINE_PER_CPU(void *, cur_csd_info);
static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
-#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
+module_param(csd_lock_timeout, ulong, 0444);
+
static atomic_t csd_bug_count = ATOMIC_INIT(0);
static u64 cfd_seq;
@@ -329,6 +331,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
u64 ts2, ts_delta;
call_single_data_t *cpu_cur_csd;
unsigned int flags = READ_ONCE(csd->node.u_flags);
+ unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
if (!(flags & CSD_FLAG_LOCK)) {
if (!unlikely(*bug_id))
@@ -341,7 +344,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
ts2 = sched_clock();
ts_delta = ts2 - *ts1;
- if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
+ if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
return false;
firsttime = !*bug_id;
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index f6bc0bc8a2aa..b9f54544e749 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -392,6 +392,13 @@ int cpu_check_up_prepare(int cpu)
*/
return -EAGAIN;
+ case CPU_UP_PREPARE:
+ /*
+ * Timeout while waiting for the CPU to show up. Allow to try
+ * again later.
+ */
+ return 0;
+
default:
/* Should not happen. Famous last words. */
diff --git a/kernel/task_work.c b/kernel/task_work.c
index c59e1a49bc40..dff75bcde151 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -12,12 +12,22 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
* @notify: how to notify the targeted task
*
* Queue @work for task_work_run() below and notify the @task if @notify
- * is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
- * it will interrupt the targeted task and run the task_work. @TWA_RESUME
- * work is run only when the task exits the kernel and returns to user mode,
- * or before entering guest mode. Fails if the @task is exiting/exited and thus
- * it can't process this @work. Otherwise @work->func() will be called when the
- * @task goes through one of the aforementioned transitions, or exits.
+ * is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI.
+ *
+ * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted
+ * task and run the task_work, regardless of whether the task is currently
+ * running in the kernel or userspace.
+ * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a
+ * reschedule IPI to force the targeted task to reschedule and run task_work.
+ * This can be advantageous if there's no strict requirement that the
+ * task_work be run as soon as possible, just whenever the task enters the
+ * kernel anyway.
+ * @TWA_RESUME work is run only when the task exits the kernel and returns to
+ * user mode, or before entering guest mode.
+ *
+ * Fails if the @task is exiting/exited and thus it can't process this @work.
+ * Otherwise @work->func() will be called when the @task goes through one of
+ * the aforementioned transitions, or exits.
*
* If the targeted task is exiting, then an error is returned and the work item
* is not queued. It's up to the caller to arrange for an alternative mechanism
@@ -53,6 +63,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
case TWA_SIGNAL:
set_notify_signal(task);
break;
+ case TWA_SIGNAL_NO_IPI:
+ __set_notify_signal(task);
+ break;
default:
WARN_ON_ONCE(1);
break;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dcdcb85121e4..3b1398fbddaf 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -482,7 +482,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
-u64 ktime_get_mono_fast_ns(void)
+u64 notrace ktime_get_mono_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_mono);
}
@@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
* Contrary to ktime_get_mono_fast_ns() this is always correct because the
* conversion factor is not affected by NTP/PTP correction.
*/
-u64 ktime_get_raw_fast_ns(void)
+u64 notrace ktime_get_raw_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_raw);
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2c43e327a619..bf5da6c4e999 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -144,6 +144,7 @@ config TRACING
select BINARY_PRINTF
select EVENT_TRACING
select TRACE_CLOCK
+ select TASKS_RCU if PREEMPTION
config GENERIC_TRACER
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 4d5629196d01..10a32b0f2deb 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -145,13 +145,14 @@ static void trace_note_time(struct blk_trace *bt)
local_irq_restore(flags);
}
-void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
- const char *fmt, ...)
+void __blk_trace_note_message(struct blk_trace *bt,
+ struct cgroup_subsys_state *css, const char *fmt, ...)
{
int n;
va_list args;
unsigned long flags;
char *buf;
+ u64 cgid = 0;
if (unlikely(bt->trace_state != Blktrace_running &&
!blk_tracer_enabled))
@@ -170,17 +171,16 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
- if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
- blkcg = NULL;
#ifdef CONFIG_BLK_CGROUP
- trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
- blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
-#else
- trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
+ if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+ cgid = cgroup_id(css->cgroup);
+ else
+ cgid = 1;
#endif
+ trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid);
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(__trace_note_message);
+EXPORT_SYMBOL_GPL(__blk_trace_note_message);
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
pid_t pid)
@@ -411,7 +411,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
return PTR_ERR(msg);
bt = filp->private_data;
- __trace_note_message(bt, NULL, "%s", msg);
+ __blk_trace_note_message(bt, NULL, "%s", msg);
kfree(msg);
return count;
@@ -783,6 +783,7 @@ void blk_trace_shutdown(struct request_queue *q)
#ifdef CONFIG_BLK_CGROUP
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
+ struct cgroup_subsys_state *blkcg_css;
struct blk_trace *bt;
/* We don't use the 'bt' value here except as an optimization... */
@@ -790,9 +791,10 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return 0;
- if (!bio->bi_blkg)
+ blkcg_css = bio_blkcg_css(bio);
+ if (!blkcg_css)
return 0;
- return cgroup_id(bio_blkcg(bio)->css.cgroup);
+ return cgroup_id(blkcg_css->cgroup);
}
#else
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8f4fb328133a..a7e84c8543cb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -404,9 +404,9 @@ free:
static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
unsigned long long timestamp;
int index;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4f1d2f5e7263..af899b058c8d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e11e167b7809..f97de82d1342 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index e9ae1f33a7f0..afb92e2f0aea 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
*/
static void
trace_sched_switch_callback(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *p,
- struct task_struct *n)
+ struct task_struct *n,
+ unsigned int prev_state)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 45796d8bd4b2..c9ffdcfe622e 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
- unsigned int prev_state,
- struct task_struct *prev, struct task_struct *next)
+ struct task_struct *prev, struct task_struct *next,
+ unsigned int prev_state)
{
int flags;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 46429f9a96fa..330aee1c1a49 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace
probe_wakeup_sched_switch(void *ignore, bool preempt,
- unsigned int prev_state,
- struct task_struct *prev, struct task_struct *next)
+ struct task_struct *prev, struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array_cpu *data;
u64 T0, T1, delta;
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 6946f8e204e3..337d797a7141 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Generic infrastructure for lifetime debugging of objects.
*
- * Started by Thomas Gleixner
- *
* Copyright (C) 2008, Thomas Gleixner <tglx@linutronix.de>
- *
- * For licencing details see kernel-base/COPYING
*/
#define pr_fmt(fmt) "ODEBUG: " fmt
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
index 06811d866775..53f6b9c6e936 100644
--- a/lib/dim/net_dim.c
+++ b/lib/dim/net_dim.c
@@ -12,41 +12,41 @@
* Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
*/
#define NET_DIM_PARAMS_NUM_PROFILES 5
-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
+#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
#define NET_DIM_RX_EQE_PROFILES { \
- {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+ {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \
}
#define NET_DIM_RX_CQE_PROFILES { \
- {2, 256}, \
- {8, 128}, \
- {16, 64}, \
- {32, 64}, \
- {64, 64} \
+ {.usec = 2, .pkts = 256,}, \
+ {.usec = 8, .pkts = 128,}, \
+ {.usec = 16, .pkts = 64,}, \
+ {.usec = 32, .pkts = 64,}, \
+ {.usec = 64, .pkts = 64,} \
}
#define NET_DIM_TX_EQE_PROFILES { \
- {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
- {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
+ {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \
}
#define NET_DIM_TX_CQE_PROFILES { \
- {5, 128}, \
- {8, 64}, \
- {16, 32}, \
- {32, 32}, \
- {64, 32} \
+ {.usec = 5, .pkts = 128,}, \
+ {.usec = 8, .pkts = 64,}, \
+ {.usec = 16, .pkts = 32,}, \
+ {.usec = 32, .pkts = 32,}, \
+ {.usec = 64, .pkts = 32,} \
}
static const struct dim_cq_moder
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 9301578f98e8..06833d404398 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -22,15 +22,33 @@ EXPORT_SYMBOL(hex_asc_upper);
*
* hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
* input.
+ *
+ * This function is used to load cryptographic keys, so it is coded in such a
+ * way that there are no conditions or memory accesses that depend on data.
+ *
+ * Explanation of the logic:
+ * (ch - '9' - 1) is negative if ch <= '9'
+ * ('0' - 1 - ch) is negative if ch >= '0'
+ * we "and" these two values, so the result is negative if ch is in the range
+ * '0' ... '9'
+ * we are only interested in the sign, so we do a shift ">> 8"; note that right
+ * shift of a negative value is implementation-defined, so we cast the
+ * value to (unsigned) before the shift --- we have 0xffffff if ch is in
+ * the range '0' ... '9', 0 otherwise
+ * we "and" this value with (ch - '0' + 1) --- we have a value 1 ... 10 if ch is
+ * in the range '0' ... '9', 0 otherwise
+ * we add this value to -1 --- we have a value 0 ... 9 if ch is in the range '0'
+ * ... '9', -1 otherwise
+ * the next line is similar to the previous one, but we need to decode both
+ * uppercase and lowercase letters, so we use (ch & 0xdf), which converts
+ * lowercase to uppercase
*/
-int hex_to_bin(char ch)
+int hex_to_bin(unsigned char ch)
{
- if ((ch >= '0') && (ch <= '9'))
- return ch - '0';
- ch = tolower(ch);
- if ((ch >= 'a') && (ch <= 'f'))
- return ch - 'a' + 10;
- return -1;
+ unsigned char cu = ch & 0xdf;
+ return -1 +
+ ((ch - '0' + 1) & (unsigned)((ch - '9' - 1) & ('0' - 1 - ch)) >> 8) +
+ ((cu - 'A' + 11) & (unsigned)((cu - 'F' - 1) & ('A' - 1 - cu)) >> 8);
}
EXPORT_SYMBOL(hex_to_bin);
@@ -45,10 +63,13 @@ EXPORT_SYMBOL(hex_to_bin);
int hex2bin(u8 *dst, const char *src, size_t count)
{
while (count--) {
- int hi = hex_to_bin(*src++);
- int lo = hex_to_bin(*src++);
+ int hi, lo;
- if ((hi < 0) || (lo < 0))
+ hi = hex_to_bin(*src++);
+ if (unlikely(hi < 0))
+ return -EINVAL;
+ lo = hex_to_bin(*src++);
+ if (unlikely(lo < 0))
return -EINVAL;
*dst++ = (hi << 4) | lo;
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 2f17b488d58e..2d5329a42105 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -188,14 +188,18 @@ EXPORT_SYMBOL(irq_poll_init);
static int irq_poll_cpu_dead(unsigned int cpu)
{
/*
- * If a CPU goes away, splice its entries to the current CPU
- * and trigger a run of the softirq
+ * If a CPU goes away, splice its entries to the current CPU and
+ * set the POLL softirq bit. The local_bh_disable()/enable() pair
+ * ensures that it is handled. Otherwise the current CPU could
+ * reach idle with the POLL softirq pending.
*/
+ local_bh_disable();
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
this_cpu_ptr(&blk_cpu_iopoll));
__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_enable();
+ local_bh_enable();
return 0;
}
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index af9302141bcf..e5c5315da274 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -76,6 +76,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
data = kzalloc(sizeof(*ref->data), gfp);
if (!data) {
free_percpu((void __percpu *)ref->percpu_count_ptr);
+ ref->percpu_count_ptr = 0;
return -ENOMEM;
}
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 08fc72d3ed16..6432b8c3e431 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -25,7 +25,7 @@
* hit it), 'max' is the address space maximum (and we return
* -EFAULT if we hit it).
*/
-static inline long do_strncpy_from_user(char *dst, const char __user *src,
+static __always_inline long do_strncpy_from_user(char *dst, const char __user *src,
unsigned long count, unsigned long max)
{
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index bffa0ebf9f8b..feeb935a2299 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -20,7 +20,7 @@
* if it fits in a aligned 'long'. The caller needs to check
* the return value against "> max".
*/
-static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
+static __always_inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
{
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
unsigned long align, res = 0;
diff --git a/lib/xarray.c b/lib/xarray.c
index 4acc88ea7c21..54e646e8e6ee 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -207,6 +207,8 @@ static void *xas_descend(struct xa_state *xas, struct xa_node *node)
if (xa_is_sibling(entry)) {
offset = xa_to_sibling(entry);
entry = xa_entry(xas->xa, node, offset);
+ if (node->shift && xa_is_node(entry))
+ entry = XA_RETRY_ENTRY;
}
xas->xa_offset = offset;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7176af65b103..ff60bd7d74e0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/blkdev.h>
#include <linux/wait.h>
#include <linux/rbtree.h>
#include <linux/kthread.h>
@@ -390,7 +391,6 @@ static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
release_work);
- struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
struct backing_dev_info *bdi = wb->bdi;
mutex_lock(&wb->bdi->cgwb_release_mutex);
@@ -401,7 +401,7 @@ static void cgwb_release_workfn(struct work_struct *work)
mutex_unlock(&wb->bdi->cgwb_release_mutex);
/* triggers blkg destruction if no online users left */
- blkcg_unpin_online(blkcg);
+ blkcg_unpin_online(wb->blkcg_css);
fprop_local_destroy_percpu(&wb->memcg_completions);
@@ -446,7 +446,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
{
struct mem_cgroup *memcg;
struct cgroup_subsys_state *blkcg_css;
- struct blkcg *blkcg;
struct list_head *memcg_cgwb_list, *blkcg_cgwb_list;
struct bdi_writeback *wb;
unsigned long flags;
@@ -454,9 +453,8 @@ static int cgwb_create(struct backing_dev_info *bdi,
memcg = mem_cgroup_from_css(memcg_css);
blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
- blkcg = css_to_blkcg(blkcg_css);
memcg_cgwb_list = &memcg->cgwb_list;
- blkcg_cgwb_list = &blkcg->cgwb_list;
+ blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css);
/* look up again under lock and discard on blkcg mismatch */
spin_lock_irqsave(&cgwb_lock, flags);
@@ -511,7 +509,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list);
- blkcg_pin_online(blkcg);
+ blkcg_pin_online(blkcg_css);
css_get(memcg_css);
css_get(blkcg_css);
}
@@ -724,18 +722,19 @@ void wb_memcg_offline(struct mem_cgroup *memcg)
/**
* wb_blkcg_offline - kill all wb's associated with a blkcg being offlined
- * @blkcg: blkcg being offlined
+ * @css: blkcg being offlined
*
* Also prevents creation of any new wb's associated with @blkcg.
*/
-void wb_blkcg_offline(struct blkcg *blkcg)
+void wb_blkcg_offline(struct cgroup_subsys_state *css)
{
struct bdi_writeback *wb, *next;
+ struct list_head *list = blkcg_get_cgwb_list(css);
spin_lock_irq(&cgwb_lock);
- list_for_each_entry_safe(wb, next, &blkcg->cgwb_list, blkcg_node)
+ list_for_each_entry_safe(wb, next, list, blkcg_node)
cgwb_kill(wb);
- blkcg->cgwb_list.next = NULL; /* prevent new wb's */
+ list->next = NULL; /* prevent new wb's */
spin_unlock_irq(&cgwb_lock);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c468fee595ff..910a138e9859 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2495,11 +2495,16 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
struct address_space *mapping = NULL;
int extra_pins, ret;
pgoff_t end;
+ bool is_hzp;
- VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
VM_BUG_ON_PAGE(!PageLocked(head), head);
VM_BUG_ON_PAGE(!PageCompound(head), head);
+ is_hzp = is_huge_zero_page(head);
+ VM_WARN_ON_ONCE_PAGE(is_hzp, head);
+ if (is_hzp)
+ return -EBUSY;
+
if (PageWriteback(head))
return -EBUSY;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f8ca7cca3c1a..3fc721789743 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6785,6 +6785,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
return ret;
}
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+ int ret;
+
+ spin_lock_irq(&hugetlb_lock);
+ ret = __get_huge_page_for_hwpoison(pfn, flags);
+ spin_unlock_irq(&hugetlb_lock);
+ return ret;
+}
+
void putback_active_hugepage(struct page *page)
{
spin_lock_irq(&hugetlb_lock);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 08291ed33e93..0a9def8ce5e8 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -315,6 +315,13 @@ static void per_cpu_remove_cache(void *arg)
struct qlist_head *q;
q = this_cpu_ptr(&cpu_quarantine);
+ /*
+ * Ensure the ordering between the writing to q->offline and
+ * per_cpu_remove_cache. Prevent cpu_quarantine from being corrupted
+ * by interrupt.
+ */
+ if (READ_ONCE(q->offline))
+ return;
qlist_move_cache(q, &to_free, cache);
qlist_free_all(&to_free, cache);
}
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 9b2b5f56f4ae..11a954763be9 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -621,6 +621,16 @@ static bool __init kfence_init_pool_early(void)
* fails for the first page, and therefore expect addr==__kfence_pool in
* most failure cases.
*/
+ for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) {
+ struct slab *slab = virt_to_slab(p);
+
+ if (!slab)
+ continue;
+#ifdef CONFIG_MEMCG
+ slab->memcg_data = 0;
+#endif
+ __folio_clear_slab(slab_folio(slab));
+ }
memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
__kfence_pool = NULL;
return false;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 725f76723220..598fece89e2b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -587,6 +587,9 @@ static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
static DEFINE_SPINLOCK(stats_flush_lock);
static DEFINE_PER_CPU(unsigned int, stats_updates);
static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
+static u64 flush_next_time;
+
+#define FLUSH_TIME (2UL*HZ)
/*
* Accessors to ensure that preemption is disabled on PREEMPT_RT because it can
@@ -637,6 +640,7 @@ static void __mem_cgroup_flush_stats(void)
if (!spin_trylock_irqsave(&stats_flush_lock, flag))
return;
+ flush_next_time = jiffies_64 + 2*FLUSH_TIME;
cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
atomic_set(&stats_flush_threshold, 0);
spin_unlock_irqrestore(&stats_flush_lock, flag);
@@ -648,10 +652,16 @@ void mem_cgroup_flush_stats(void)
__mem_cgroup_flush_stats();
}
+void mem_cgroup_flush_stats_delayed(void)
+{
+ if (time_after64(jiffies_64, flush_next_time))
+ mem_cgroup_flush_stats();
+}
+
static void flush_memcg_stats_dwork(struct work_struct *w)
{
__mem_cgroup_flush_stats();
- queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+ queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
}
/**
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dcb6bb9cf731..d4a4adcca01f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1274,7 +1274,7 @@ try_again:
}
out:
if (ret == -EIO)
- dump_page(p, "hwpoison: unhandlable page");
+ pr_err("Memory failure: %#lx: unhandlable page.\n", page_to_pfn(p));
return ret;
}
@@ -1498,50 +1498,113 @@ static int try_to_split_thp_page(struct page *page, const char *msg)
return 0;
}
-static int memory_failure_hugetlb(unsigned long pfn, int flags)
+/*
+ * Called from hugetlb code with hugetlb_lock held.
+ *
+ * Return values:
+ * 0 - free hugepage
+ * 1 - in-use hugepage
+ * 2 - not a hugepage
+ * -EBUSY - the hugepage is busy (try to retry)
+ * -EHWPOISON - the hugepage is already hwpoisoned
+ */
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+ struct page *page = pfn_to_page(pfn);
+ struct page *head = compound_head(page);
+ int ret = 2; /* fallback to normal page handling */
+ bool count_increased = false;
+
+ if (!PageHeadHuge(head))
+ goto out;
+
+ if (flags & MF_COUNT_INCREASED) {
+ ret = 1;
+ count_increased = true;
+ } else if (HPageFreed(head) || HPageMigratable(head)) {
+ ret = get_page_unless_zero(head);
+ if (ret)
+ count_increased = true;
+ } else {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (TestSetPageHWPoison(head)) {
+ ret = -EHWPOISON;
+ goto out;
+ }
+
+ return ret;
+out:
+ if (count_increased)
+ put_page(head);
+ return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Taking refcount of hugetlb pages needs extra care about race conditions
+ * with basic operations like hugepage allocation/free/demotion.
+ * So some of prechecks for hwpoison (pinning, and testing/setting
+ * PageHWPoison) should be done in single hugetlb_lock range.
+ */
+static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
{
- struct page *p = pfn_to_page(pfn);
- struct page *head = compound_head(p);
int res;
+ struct page *p = pfn_to_page(pfn);
+ struct page *head;
unsigned long page_flags;
+ bool retry = true;
- if (TestSetPageHWPoison(head)) {
- pr_err("Memory failure: %#lx: already hardware poisoned\n",
- pfn);
- res = -EHWPOISON;
- if (flags & MF_ACTION_REQUIRED)
+ *hugetlb = 1;
+retry:
+ res = get_huge_page_for_hwpoison(pfn, flags);
+ if (res == 2) { /* fallback to normal page handling */
+ *hugetlb = 0;
+ return 0;
+ } else if (res == -EHWPOISON) {
+ pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+ if (flags & MF_ACTION_REQUIRED) {
+ head = compound_head(p);
res = kill_accessing_process(current, page_to_pfn(head), flags);
+ }
return res;
+ } else if (res == -EBUSY) {
+ if (retry) {
+ retry = false;
+ goto retry;
+ }
+ action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+ return res;
+ }
+
+ head = compound_head(p);
+ lock_page(head);
+
+ if (hwpoison_filter(p)) {
+ ClearPageHWPoison(head);
+ res = -EOPNOTSUPP;
+ goto out;
}
num_poisoned_pages_inc();
- if (!(flags & MF_COUNT_INCREASED)) {
- res = get_hwpoison_page(p, flags);
- if (!res) {
- lock_page(head);
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
- unlock_page(head);
- return -EOPNOTSUPP;
- }
- unlock_page(head);
- res = MF_FAILED;
- if (__page_handle_poison(p)) {
- page_ref_inc(p);
- res = MF_RECOVERED;
- }
- action_result(pfn, MF_MSG_FREE_HUGE, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
- } else if (res < 0) {
- action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
- return -EBUSY;
+ /*
+ * Handling free hugepage. The possible race with hugepage allocation
+ * or demotion can be prevented by PageHWPoison flag.
+ */
+ if (res == 0) {
+ unlock_page(head);
+ res = MF_FAILED;
+ if (__page_handle_poison(p)) {
+ page_ref_inc(p);
+ res = MF_RECOVERED;
}
+ action_result(pfn, MF_MSG_FREE_HUGE, res);
+ return res == MF_RECOVERED ? 0 : -EBUSY;
}
- lock_page(head);
-
/*
* The page could have changed compound pages due to race window.
* If this happens just bail out.
@@ -1554,14 +1617,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
page_flags = head->flags;
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
- put_page(p);
- res = -EOPNOTSUPP;
- goto out;
- }
-
/*
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
* simply disable it. In order to make it work properly, we need
@@ -1588,6 +1643,12 @@ out:
unlock_page(head);
return res;
}
+#else
+static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
+{
+ return 0;
+}
+#endif
static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
struct dev_pagemap *pgmap)
@@ -1712,6 +1773,7 @@ int memory_failure(unsigned long pfn, int flags)
int res = 0;
unsigned long page_flags;
bool retry = true;
+ int hugetlb = 0;
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
@@ -1739,10 +1801,9 @@ int memory_failure(unsigned long pfn, int flags)
}
try_again:
- if (PageHuge(p)) {
- res = memory_failure_hugetlb(pfn, flags);
+ res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
+ if (hugetlb)
goto unlock_mutex;
- }
if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
diff --git a/mm/mmap.c b/mm/mmap.c
index 3aa839f81e63..313b57d55a63 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2117,14 +2117,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
return addr;
}
-#ifndef arch_get_mmap_end
-#define arch_get_mmap_end(addr) (TASK_SIZE)
-#endif
-
-#ifndef arch_get_mmap_base
-#define arch_get_mmap_base(addr, base) (base)
-#endif
-
/* Get an address range which is currently unmapped.
* For shmat() with addr=0.
*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 459d195d2ff6..f45ff1b7626a 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -1036,6 +1036,18 @@ int mmu_interval_notifier_insert_locked(
}
EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked);
+static bool
+mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions,
+ unsigned long seq)
+{
+ bool ret;
+
+ spin_lock(&subscriptions->lock);
+ ret = subscriptions->invalidate_seq != seq;
+ spin_unlock(&subscriptions->lock);
+ return ret;
+}
+
/**
* mmu_interval_notifier_remove - Remove a interval notifier
* @interval_sub: Interval subscription to unregister
@@ -1083,7 +1095,7 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub)
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
if (seq)
wait_event(subscriptions->wq,
- READ_ONCE(subscriptions->invalidate_seq) != seq);
+ mmu_interval_seq_released(subscriptions, seq));
/* pairs with mmgrab in mmu_interval_notifier_insert() */
mmdrop(mm);
diff --git a/mm/mremap.c b/mm/mremap.c
index 303d3290b938..0b93fac76851 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -947,7 +947,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
return -EINTR;
vma = vma_lookup(mm, addr);
if (!vma) {
- ret = EFAULT;
+ ret = -EFAULT;
goto out;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 55a9e48a7a02..9d7afc2d959e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -226,6 +226,8 @@ void *vmalloc(unsigned long size)
}
EXPORT_SYMBOL(vmalloc);
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc);
+
/*
* vzalloc - allocate virtually contiguous memory with zero fill
*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7ec38194f8e1..49d7df39b02d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -632,7 +632,7 @@ done:
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
- /* Drop a reference taken by wake_oom_reaper */
+ /* Drop a reference taken by queue_oom_reaper */
put_task_struct(tsk);
}
@@ -644,12 +644,12 @@ static int oom_reaper(void *unused)
struct task_struct *tsk = NULL;
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
- spin_lock(&oom_reaper_lock);
+ spin_lock_irq(&oom_reaper_lock);
if (oom_reaper_list != NULL) {
tsk = oom_reaper_list;
oom_reaper_list = tsk->oom_reaper_list;
}
- spin_unlock(&oom_reaper_lock);
+ spin_unlock_irq(&oom_reaper_lock);
if (tsk)
oom_reap_task(tsk);
@@ -658,22 +658,48 @@ static int oom_reaper(void *unused)
return 0;
}
-static void wake_oom_reaper(struct task_struct *tsk)
+static void wake_oom_reaper(struct timer_list *timer)
{
- /* mm is already queued? */
- if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
- return;
+ struct task_struct *tsk = container_of(timer, struct task_struct,
+ oom_reaper_timer);
+ struct mm_struct *mm = tsk->signal->oom_mm;
+ unsigned long flags;
- get_task_struct(tsk);
+ /* The victim managed to terminate on its own - see exit_mmap */
+ if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
+ put_task_struct(tsk);
+ return;
+ }
- spin_lock(&oom_reaper_lock);
+ spin_lock_irqsave(&oom_reaper_lock, flags);
tsk->oom_reaper_list = oom_reaper_list;
oom_reaper_list = tsk;
- spin_unlock(&oom_reaper_lock);
+ spin_unlock_irqrestore(&oom_reaper_lock, flags);
trace_wake_reaper(tsk->pid);
wake_up(&oom_reaper_wait);
}
+/*
+ * Give the OOM victim time to exit naturally before invoking the oom_reaping.
+ * The timers timeout is arbitrary... the longer it is, the longer the worst
+ * case scenario for the OOM can take. If it is too small, the oom_reaper can
+ * get in the way and release resources needed by the process exit path.
+ * e.g. The futex robust list can sit in Anon|Private memory that gets reaped
+ * before the exit path is able to wake the futex waiters.
+ */
+#define OOM_REAPER_DELAY (2*HZ)
+static void queue_oom_reaper(struct task_struct *tsk)
+{
+ /* mm is already queued? */
+ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
+ return;
+
+ get_task_struct(tsk);
+ timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0);
+ tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY;
+ add_timer(&tsk->oom_reaper_timer);
+}
+
static int __init oom_init(void)
{
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -681,7 +707,7 @@ static int __init oom_init(void)
}
subsys_initcall(oom_init)
#else
-static inline void wake_oom_reaper(struct task_struct *tsk)
+static inline void queue_oom_reaper(struct task_struct *tsk)
{
}
#endif /* CONFIG_MMU */
@@ -932,7 +958,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
rcu_read_unlock();
if (can_oom_reap)
- wake_oom_reaper(victim);
+ queue_oom_reaper(victim);
mmdrop(mm);
put_task_struct(victim);
@@ -968,7 +994,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
task_lock(victim);
if (task_will_free_mem(victim)) {
mark_oom_victim(victim);
- wake_oom_reaper(victim);
+ queue_oom_reaper(victim);
task_unlock(victim);
put_task_struct(victim);
return;
@@ -1067,7 +1093,7 @@ bool out_of_memory(struct oom_control *oc)
*/
if (task_will_free_mem(current)) {
mark_oom_victim(current);
- wake_oom_reaper(current);
+ queue_oom_reaper(current);
return true;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 33ca8cab21e6..0e42038382c1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8919,7 +8919,7 @@ void *__init alloc_large_system_hash(const char *tablename,
table = memblock_alloc_raw(size,
SMP_CACHE_BYTES);
} else if (get_order(size) >= MAX_ORDER || hashdist) {
- table = __vmalloc(size, gfp_flags);
+ table = vmalloc_huge(size, gfp_flags);
virt = true;
if (table)
huge = is_vm_area_hugepages(table);
diff --git a/mm/page_io.c b/mm/page_io.c
index 89fbf3cae30f..3fbdab6a940e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous)
* attempt to access it in the page fault retry time check.
*/
if (synchronous) {
- bio->bi_opf |= REQ_POLLED;
get_task_struct(current);
bio->bi_private = current;
}
@@ -372,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
if (!READ_ONCE(bio->bi_private))
break;
- if (!bio_poll(bio, NULL, 0))
- blk_io_schedule();
+ blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
bio_put(bio);
diff --git a/mm/readahead.c b/mm/readahead.c
index 8e3775829513..26bf74a6b2fe 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -113,6 +113,7 @@
* ->readpage() which may be less efficient.
*/
+#include <linux/blkdev.h>
#include <linux/kernel.h>
#include <linux/dax.h>
#include <linux/gfp.h>
@@ -474,7 +475,8 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
if (!folio)
return -ENOMEM;
- if (mark - index < (1UL << order))
+ mark = round_up(mark, 1UL << order);
+ if (index == mark)
folio_set_readahead(folio);
err = filemap_add_folio(ractl->mapping, folio, index, gfp);
if (err)
@@ -555,8 +557,9 @@ static void ondemand_readahead(struct readahead_control *ractl,
struct file_ra_state *ra = ractl->ra;
unsigned long max_pages = ra->ra_pages;
unsigned long add_pages;
- unsigned long index = readahead_index(ractl);
- pgoff_t prev_index;
+ pgoff_t index = readahead_index(ractl);
+ pgoff_t expected, prev_index;
+ unsigned int order = folio ? folio_order(folio) : 0;
/*
* If the request exceeds the readahead window, allow the read to
@@ -575,8 +578,9 @@ static void ondemand_readahead(struct readahead_control *ractl,
* It's the expected callback index, assume sequential access.
* Ramp up sizes, and push forward the readahead window.
*/
- if ((index == (ra->start + ra->size - ra->async_size) ||
- index == (ra->start + ra->size))) {
+ expected = round_up(ra->start + ra->size - ra->async_size,
+ 1UL << order);
+ if (index == expected || index == (ra->start + ra->size)) {
ra->start += ra->size;
ra->size = get_next_ra_size(ra, max_pages);
ra->async_size = ra->size;
@@ -662,7 +666,7 @@ readit:
}
ractl->_index = ra->start;
- page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
+ page_cache_ra_order(ractl, ra, order);
}
void page_cache_sync_ra(struct readahead_control *ractl,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 63c61f8b2611..981a6e85c88e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -6,6 +6,7 @@
* Swap reorganised 29.12.95, Stephen Tweedie
*/
+#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
@@ -179,7 +180,7 @@ static int discard_swap(struct swap_info_struct *si)
nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
if (nr_blocks) {
err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL, 0);
+ nr_blocks, GFP_KERNEL);
if (err)
return err;
cond_resched();
@@ -190,7 +191,7 @@ static int discard_swap(struct swap_info_struct *si)
nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
err = blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_KERNEL, 0);
+ nr_blocks, GFP_KERNEL);
if (err)
break;
@@ -254,7 +255,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
start_block <<= PAGE_SHIFT - 9;
nr_blocks <<= PAGE_SHIFT - 9;
if (blkdev_issue_discard(si->bdev, start_block,
- nr_blocks, GFP_NOIO, 0))
+ nr_blocks, GFP_NOIO))
break;
se = next_se(se);
@@ -2466,7 +2467,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
if (p->flags & SWP_CONTINUED)
free_swap_count_continuations(p);
- if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev)))
+ if (!p->bdev || !bdev_nonrot(p->bdev))
atomic_dec(&nr_rotate_swap);
mutex_lock(&swapon_mutex);
@@ -2761,7 +2762,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
* write only restriction. Hence zoned block devices are not
* suitable for swapping. Disallow them here.
*/
- if (blk_queue_is_zoned(p->bdev->bd_disk->queue))
+ if (bdev_is_zoned(p->bdev))
return -EINVAL;
p->flags |= SWP_BLKDEV;
} else if (S_ISREG(inode->i_mode)) {
@@ -2957,20 +2958,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
return nr_extents;
}
-/*
- * Helper to sys_swapon determining if a given swap
- * backing device queue supports DISCARD operations.
- */
-static bool swap_discardable(struct swap_info_struct *si)
-{
- struct request_queue *q = bdev_get_queue(si->bdev);
-
- if (!blk_queue_discard(q))
- return false;
-
- return true;
-}
-
SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
{
struct swap_info_struct *p;
@@ -3065,13 +3052,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
goto bad_swap_unlock_inode;
}
- if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue))
+ if (p->bdev && bdev_stable_writes(p->bdev))
p->flags |= SWP_STABLE_WRITES;
if (p->bdev && p->bdev->bd_disk->fops->rw_page)
p->flags |= SWP_SYNCHRONOUS_IO;
- if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+ if (p->bdev && bdev_nonrot(p->bdev)) {
int cpu;
unsigned long ci, nr_cluster;
@@ -3132,7 +3119,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
sizeof(long),
GFP_KERNEL);
- if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
+ if ((swap_flags & SWAP_FLAG_DISCARD) &&
+ p->bdev && bdev_max_discard_sectors(p->bdev)) {
/*
* When discard is enabled for swap with no particular
* policy flagged, we set all swap discard flags here in
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0cb8e5ef1713..e9bb6db002aa 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -72,12 +72,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
_dst_pte = pte_mkdirty(_dst_pte);
if (page_in_cache && !vm_shared)
writable = false;
- if (writable) {
- if (wp_copy)
- _dst_pte = pte_mkuffd_wp(_dst_pte);
- else
- _dst_pte = pte_mkwrite(_dst_pte);
- }
+
+ /*
+ * Always mark a PTE as write-protected when needed, regardless of
+ * VM_WRITE, which the user might change.
+ */
+ if (wp_copy)
+ _dst_pte = pte_mkuffd_wp(_dst_pte);
+ else if (writable)
+ _dst_pte = pte_mkwrite(_dst_pte);
dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
diff --git a/mm/util.c b/mm/util.c
index 54e5e761a9a9..3492a9e81aa3 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -592,8 +592,15 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
return NULL;
}
- return __vmalloc_node(size, 1, flags, node,
- __builtin_return_address(0));
+ /*
+ * kvmalloc() can always use VM_ALLOW_HUGE_VMAP,
+ * since the callers already cannot assume anything
+ * about the resulting pointer, and cannot play
+ * protection games.
+ */
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kvmalloc_node);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0b17498a34f1..cadfbb5155ea 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2653,15 +2653,18 @@ static void __vunmap(const void *addr, int deallocate_pages)
vm_remove_mappings(area, deallocate_pages);
if (deallocate_pages) {
- unsigned int page_order = vm_area_page_order(area);
- int i, step = 1U << page_order;
+ int i;
- for (i = 0; i < area->nr_pages; i += step) {
+ for (i = 0; i < area->nr_pages; i++) {
struct page *page = area->pages[i];
BUG_ON(!page);
- mod_memcg_page_state(page, MEMCG_VMALLOC, -step);
- __free_pages(page, page_order);
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
+ /*
+ * High-order allocs for huge vmallocs are split, so
+ * can be freed as an array of order-0 allocations
+ */
+ __free_pages(page, 0);
cond_resched();
}
atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
@@ -2914,12 +2917,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
if (nr != nr_pages_request)
break;
}
- } else
- /*
- * Compound pages required for remap_vmalloc_page if
- * high-order pages.
- */
- gfp |= __GFP_COMP;
+ }
/* High-order pages or fallback path if "bulk" fails. */
@@ -2933,6 +2931,15 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
page = alloc_pages_node(nid, gfp, order);
if (unlikely(!page))
break;
+ /*
+ * Higher order allocations must be able to be treated as
+ * indepdenent small pages by callers (as they can with
+ * small-page vmallocs). Some drivers do their own refcounting
+ * on vmalloc_to_page() pages, some use page->mapping,
+ * page->lru, etc.
+ */
+ if (order)
+ split_page(page, order);
/*
* Careful, we allocate and map page-order pages, but
@@ -2992,11 +2999,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
if (gfp_mask & __GFP_ACCOUNT) {
- int i, step = 1U << page_order;
+ int i;
- for (i = 0; i < area->nr_pages; i += step)
- mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC,
- step);
+ for (i = 0; i < area->nr_pages; i++)
+ mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
}
/*
@@ -3095,7 +3101,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL;
}
- if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
+ if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) {
unsigned long size_per_node;
/*
@@ -3262,21 +3268,24 @@ void *vmalloc(unsigned long size)
EXPORT_SYMBOL(vmalloc);
/**
- * vmalloc_no_huge - allocate virtually contiguous memory using small pages
- * @size: allocation size
+ * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
+ * @size: allocation size
+ * @gfp_mask: flags for the page level allocator
*
- * Allocate enough non-huge pages to cover @size from the page level
+ * Allocate enough pages to cover @size from the page level
* allocator and map them into contiguous kernel virtual space.
+ * If @size is greater than or equal to PMD_SIZE, allow using
+ * huge pages for the memory
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_no_huge(unsigned long size)
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
{
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
+ gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
-EXPORT_SYMBOL(vmalloc_no_huge);
+EXPORT_SYMBOL_GPL(vmalloc_huge);
/**
* vzalloc - allocate virtually contiguous memory with zero fill
diff --git a/mm/workingset.c b/mm/workingset.c
index 8a3828acc0bf..592569a8974c 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -355,7 +355,7 @@ void workingset_refault(struct folio *folio, void *shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
- mem_cgroup_flush_stats();
+ mem_cgroup_flush_stats_delayed();
/*
* Compare the distance to the existing workingset size. We
* don't activate pages that couldn't stay resident even if
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0899a729a23f..c120c7c6d25f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 84312c836549..fe803bee419a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work)
/* Disable LE Advertising */
le_disable_advertising(hdev);
hci_dev_lock(hdev);
- hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
hci_dev_unlock(hdev);
return;
}
@@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
EXPORT_SYMBOL(hci_get_route);
/* This function requires the caller holds hdev->lock */
-void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
struct hci_conn_params *params;
@@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
params->conn = NULL;
}
- conn->state = BT_CLOSED;
-
/* If the status indicates successful cancellation of
* the attempt (i.e. Unknown Connection Id) there's no point of
* notifying failure since we'll go back to keep trying to
@@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
- hci_connect_cfm(conn, status);
-
- hci_conn_del(conn);
-
/* Since we may have temporarily stopped the background scanning in
* favor of connection establishment, we should restart it.
*/
@@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
hci_enable_advertising(hdev);
}
+/* This function requires the caller holds hdev->lock */
+void hci_conn_failed(struct hci_conn *conn, u8 status)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ switch (conn->type) {
+ case LE_LINK:
+ hci_le_conn_failed(conn, status);
+ break;
+ case ACL_LINK:
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ break;
+ }
+
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
struct hci_conn *conn = data;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b4782a6c1025..45c2dd2e1590 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev)
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- sprintf(hdev->name, "hci%d", id);
+ snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index abaabfae19cc..66451661283c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2834,7 +2834,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -2859,7 +2859,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -3067,18 +3067,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle");
- return;
- }
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (ev->status)
+ goto unlock;
+
/* Connection may not exist if auto-connected. Check the bredr
* allowlist to see if this device is allowed to auto connect.
* If link is an ACL type, create a connection class
@@ -3122,8 +3124,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- if (!ev->status) {
+ if (!status) {
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ goto done;
+ }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3164,19 +3172,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp),
&cp);
}
- } else {
- conn->state = BT_CLOSED;
- if (conn->type == ACL_LINK)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, ev->status);
}
if (conn->type == ACL_LINK)
hci_sco_setup(conn, ev->status);
- if (ev->status) {
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+done:
+ if (status) {
+ hci_conn_failed(conn, status);
} else if (ev->link_type == SCO_LINK) {
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_CVSD:
@@ -3185,7 +3188,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
break;
}
- hci_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, status);
}
unlock:
@@ -4676,6 +4679,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_sync_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
switch (ev->link_type) {
case SCO_LINK:
@@ -4690,12 +4694,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
return;
}
- if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle");
- return;
- }
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
@@ -4729,9 +4728,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- switch (ev->status) {
+ switch (status) {
case 0x00:
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ conn->state = BT_CLOSED;
+ break;
+ }
+
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4775,8 +4782,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- hci_connect_cfm(conn, ev->status);
- if (ev->status)
+ hci_connect_cfm(conn, status);
+ if (status)
hci_conn_del(conn);
unlock:
@@ -5527,11 +5534,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
struct smp_irk *irk;
u8 addr_type;
- if (handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle");
- return;
- }
-
hci_dev_lock(hdev);
/* All controllers implicitly stop advertising in the event of a
@@ -5541,6 +5543,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn = hci_lookup_le_connect(hdev);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (status)
+ goto unlock;
+
conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
@@ -5603,8 +5611,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
+ HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ }
+
if (status) {
- hci_le_conn_failed(conn, status);
+ hci_conn_failed(conn, status);
goto unlock;
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 8f4c5698913d..13600bf120b0 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4408,12 +4408,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
+ int err;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
return hci_disconnect_sync(hdev, conn, reason);
case BT_CONNECT:
- return hci_connect_cancel_sync(hdev, conn);
+ err = hci_connect_cancel_sync(hdev, conn);
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync.
+ */
+ if (err)
+ hci_conn_failed(conn, err);
+
+ return err;
case BT_CONNECT2:
return hci_reject_conn_sync(hdev, conn, reason);
default:
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index e7b9c2636d10..af709c182674 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -108,6 +108,7 @@ struct xdp_test_data {
struct page_pool *pp;
struct xdp_frame **frames;
struct sk_buff **skbs;
+ struct xdp_mem_info mem;
u32 batch_size;
u32 frame_cnt;
};
@@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
{
- struct xdp_mem_info mem = {};
struct page_pool *pp;
int err = -ENOMEM;
struct page_pool_params pp_params = {
@@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c
}
/* will copy 'mem.id' into pp->xdp_mem_id */
- err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp);
+ err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
if (err)
goto err_mmodel;
@@ -202,6 +202,7 @@ err_skbs:
static void xdp_test_run_teardown(struct xdp_test_data *xdp)
{
+ xdp_unreg_mem_model(&xdp->mem);
page_pool_destroy(xdp->pp);
kfree(xdp->frames);
kfree(xdp->skbs);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 196417859c4a..68b3e850bcb9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
dev_sw_netstats_rx_add(brdev, skb->len);
vg = br_vlan_group_rcu(br);
+
+ /* Reset the offload_fwd_mark because there could be a stacked
+ * bridge above, and it should not think this bridge it doing
+ * that bridge's work forwarding out its ports.
+ */
+ br_switchdev_frame_unmark(skb);
+
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc mode when someone
* may be running packet capture.
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 8cc44c367231..18affda2b522 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -353,6 +353,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
attr.orig_dev = br_dev;
vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (v->msti) {
diff --git a/net/can/isotp.c b/net/can/isotp.c
index bafb0fb5f0e0..1e7c6a460ef9 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -906,6 +906,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
struct canfd_frame *cf;
int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+ s64 hrtimer_sec = 0;
int off;
int err;
@@ -1004,7 +1005,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
isotp_create_fframe(cf, so, ae);
/* start timeout for FC */
- hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ hrtimer_sec = 1;
+ hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
+ HRTIMER_MODE_REL_SOFT);
}
/* send the first or only CAN frame */
@@ -1017,6 +1020,11 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
+
+ /* no transmission -> no timeout monitoring */
+ if (hrtimer_sec)
+ hrtimer_cancel(&so->txtimer);
+
goto err_out_drop;
}
@@ -1181,6 +1189,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
lock_sock(sk);
+ if (so->bound) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* do not register frame reception for functional addressing */
if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
do_rx_reg = 0;
@@ -1191,10 +1204,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out;
}
- if (so->bound && addr->can_ifindex == so->ifindex &&
- rx_id == so->rxid && tx_id == so->txid)
- goto out;
-
dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
err = -ENODEV;
@@ -1229,22 +1238,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
dev_put(dev);
- if (so->bound && do_rx_reg) {
- /* unregister old filter */
- if (so->ifindex) {
- dev = dev_get_by_index(net, so->ifindex);
- if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
- can_rx_unregister(net, dev, so->txid,
- SINGLE_MASK(so->txid),
- isotp_rcv_echo, sk);
- dev_put(dev);
- }
- }
- }
-
/* switch to new settings */
so->ifindex = ifindex;
so->rxid = rx_id;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1c5815530e0d..9d82bb42e958 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
target_init(&req->r_t);
}
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable. Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
- struct ceph_osd_client *osdc = req->r_osdc;
- bool mempool = req->r_mempool;
- unsigned int num_ops = req->r_num_ops;
- u64 snapid = req->r_snapid;
- struct ceph_snap_context *snapc = req->r_snapc;
- bool linger = req->r_linger;
- struct ceph_msg *request_msg = req->r_request;
- struct ceph_msg *reply_msg = req->r_reply;
-
- dout("%s req %p\n", __func__, req);
- WARN_ON(kref_read(&req->r_kref) != 1);
- request_release_checks(req);
-
- WARN_ON(kref_read(&request_msg->kref) != 1);
- WARN_ON(kref_read(&reply_msg->kref) != 1);
- target_destroy(&req->r_t);
-
- request_init(req);
- req->r_osdc = osdc;
- req->r_mempool = mempool;
- req->r_num_ops = num_ops;
- req->r_snapid = snapid;
- req->r_snapc = snapc;
- req->r_linger = linger;
- req->r_request = request_msg;
- req->r_reply = reply_msg;
-}
-
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
* @watch_opcode: CEPH_OSD_WATCH_OP_*
*/
static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
- u64 cookie, u8 watch_opcode)
+ u8 watch_opcode, u64 cookie, u32 gen)
{
struct ceph_osd_req_op *op;
op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
- op->watch.gen = 0;
+ op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+ u64 cookie, struct ceph_pagelist *request_pl)
+{
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op->notify.cookie = cookie;
+
+ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+ op->indata_len = request_pl->length;
}
/*
@@ -2385,7 +2364,11 @@ again:
if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
err = -ENOSPC;
} else {
- pr_warn_ratelimited("FULL or reached pool quota\n");
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL))
+ pr_warn_ratelimited("cluster is full (osdmap FULL)\n");
+ else
+ pr_warn_ratelimited("pool %lld is full or reached quota\n",
+ req->r_t.base_oloc.pool);
req->r_t.paused = true;
maybe_request_map(osdc);
}
@@ -2727,10 +2710,13 @@ static void linger_release(struct kref *kref)
WARN_ON(!list_empty(&lreq->pending_lworks));
WARN_ON(lreq->osd);
- if (lreq->reg_req)
- ceph_osdc_put_request(lreq->reg_req);
- if (lreq->ping_req)
- ceph_osdc_put_request(lreq->ping_req);
+ if (lreq->request_pl)
+ ceph_pagelist_release(lreq->request_pl);
+ if (lreq->notify_id_pages)
+ ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+ ceph_osdc_put_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->ping_req);
target_destroy(&lreq->t);
kfree(lreq);
}
@@ -2999,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3044,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
lreq, lreq->linger_id, req->r_result, lreq->last_error);
if (req->r_result < 0) {
@@ -3053,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
static void send_linger(struct ceph_osd_linger_request *lreq)
{
- struct ceph_osd_request *req = lreq->reg_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_client *osdc = lreq->osdc;
+ struct ceph_osd_request *req;
+ int ret;
- verify_osdc_wrlocked(req->r_osdc);
+ verify_osdc_wrlocked(osdc);
+ mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->reg_req) {
+ if (lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->reg_req);
+ }
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- request_reinit(req);
target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
- mutex_lock(&lreq->lock);
if (lreq->is_watch && lreq->committed) {
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id);
- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
- op->watch.gen = ++lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+ lreq->linger_id, ++lreq->register_gen);
dout("lreq %p reconnect register_gen %u\n", lreq,
- op->watch.gen);
+ req->r_ops[0].watch.gen);
req->r_callback = linger_reconnect_cb;
} else {
- if (!lreq->is_watch)
+ if (lreq->is_watch) {
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+ lreq->linger_id, 0);
+ } else {
lreq->notify_id = 0;
- else
- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+ refcount_inc(&lreq->request_pl->refcnt);
+ osd_req_op_notify_init(req, 0, lreq->linger_id,
+ lreq->request_pl);
+ ceph_osd_data_pages_init(
+ osd_req_op_data(req, 0, notify, response_data),
+ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+ }
dout("lreq %p register\n", lreq);
req->r_callback = linger_commit_cb;
}
- mutex_unlock(&lreq->lock);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->reg_req = req;
+ mutex_unlock(&lreq->lock);
submit_request(req, true);
}
@@ -3102,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->ping_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
__func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
lreq->last_error);
@@ -3117,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
lreq->register_gen, req->r_ops[0].watch.gen);
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3124,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
static void send_linger_ping(struct ceph_osd_linger_request *lreq)
{
struct ceph_osd_client *osdc = lreq->osdc;
- struct ceph_osd_request *req = lreq->ping_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_request *req;
+ int ret;
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
__func__, lreq, lreq->linger_id, lreq->ping_sent,
lreq->register_gen);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->ping_req) {
+ if (lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+ ceph_osdc_put_request(lreq->ping_req);
+ }
- request_reinit(req);
- target_copy(&req->r_t, &lreq->t);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id ||
- op->watch.op != CEPH_OSD_WATCH_OP_PING);
- op->watch.gen = lreq->register_gen;
+ target_copy(&req->r_t, &lreq->t);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+ lreq->register_gen);
req->r_callback = linger_ping_cb;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
+
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->ping_req = req;
ceph_osdc_get_request(req);
account_request(req);
@@ -3165,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
down_write(&osdc->lock);
linger_register(lreq);
- if (lreq->is_watch) {
- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
- } else {
- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
- }
calc_target(osdc, &lreq->t, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
*/
static void __linger_cancel(struct ceph_osd_linger_request *lreq)
{
- if (lreq->is_watch && lreq->ping_req->r_osd)
+ if (lreq->ping_req && lreq->ping_req->r_osd)
cancel_linger_request(lreq->ping_req);
- if (lreq->reg_req->r_osd)
+ if (lreq->reg_req && lreq->reg_req->r_osd)
cancel_linger_request(lreq->reg_req);
cancel_linger_map_check(lreq);
unlink_linger(lreq->osd, lreq);
@@ -4566,8 +4591,13 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
- * Unregister a registered request. The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request. If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
@@ -4653,43 +4683,6 @@ again:
}
EXPORT_SYMBOL(ceph_osdc_sync);
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
- struct ceph_osd_request *req;
-
- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
- if (!req)
- return NULL;
-
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
- struct ceph_osd_request *req;
-
- req = alloc_linger_request(lreq);
- if (!req)
- return NULL;
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
- ceph_osdc_put_request(req);
- return NULL;
- }
-
- return req;
-}
-
/*
* Returns a handle, caller owns a ref.
*/
@@ -4719,18 +4712,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
- if (!lreq->ping_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (ret) {
@@ -4768,8 +4749,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&req->r_mtime);
- osd_req_op_watch_init(req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_UNWATCH);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+ lreq->linger_id, 0);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
@@ -4855,35 +4836,6 @@ out_put_req:
}
EXPORT_SYMBOL(ceph_osdc_notify_ack);
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
- u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, u32 payload_len)
-{
- struct ceph_osd_req_op *op;
- struct ceph_pagelist *pl;
- int ret;
-
- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
- op->notify.cookie = cookie;
-
- pl = ceph_pagelist_alloc(GFP_NOIO);
- if (!pl)
- return -ENOMEM;
-
- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
- ret |= ceph_pagelist_encode_32(pl, timeout);
- ret |= ceph_pagelist_encode_32(pl, payload_len);
- ret |= ceph_pagelist_append(pl, payload, payload_len);
- if (ret) {
- ceph_pagelist_release(pl);
- return -ENOMEM;
- }
-
- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
- op->indata_len = pl->length;
- return 0;
-}
-
/*
* @timeout: in seconds
*
@@ -4902,7 +4854,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
size_t *preply_len)
{
struct ceph_osd_linger_request *lreq;
- struct page **pages;
int ret;
WARN_ON(!timeout);
@@ -4915,41 +4866,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
if (!lreq)
return -ENOMEM;
- lreq->preply_pages = preply_pages;
- lreq->preply_len = preply_len;
-
- ceph_oid_copy(&lreq->t.base_oid, oid);
- ceph_oloc_copy(&lreq->t.base_oloc, oloc);
- lreq->t.flags = CEPH_OSD_FLAG_READ;
-
- lreq->reg_req = alloc_linger_request(lreq);
- if (!lreq->reg_req) {
+ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+ if (!lreq->request_pl) {
ret = -ENOMEM;
goto out_put_lreq;
}
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
- payload, payload_len);
- if (ret)
+ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+ if (ret) {
+ ret = -ENOMEM;
goto out_put_lreq;
+ }
/* for notify_id */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
+ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(lreq->notify_id_pages)) {
+ ret = PTR_ERR(lreq->notify_id_pages);
+ lreq->notify_id_pages = NULL;
goto out_put_lreq;
}
- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
- response_data),
- pages, PAGE_SIZE, 0, false, true);
- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
- if (ret)
- goto out_put_lreq;
+ lreq->preply_pages = preply_pages;
+ lreq->preply_len = preply_len;
+
+ ceph_oid_copy(&lreq->t.base_oid, oid);
+ ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+ lreq->t.flags = CEPH_OSD_FLAG_READ;
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8c6c08446556..2771fd22dc6a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev;
struct net_device_path_ctx ctx = {
.dev = dev,
- .daddr = daddr,
};
struct net_device_path *path;
int ret = 0;
+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev;
@@ -10304,7 +10304,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
{
struct net_device_core_stats __percpu *p;
@@ -10315,11 +10315,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
free_percpu(p);
/* This READ_ONCE() pairs with the cmpxchg() above */
- p = READ_ONCE(dev->core_stats);
- if (!p)
- return NULL;
-
- return this_cpu_ptr(p);
+ return READ_ONCE(dev->core_stats);
}
EXPORT_SYMBOL(netdev_core_stats_alloc);
@@ -10356,9 +10352,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
for_each_possible_cpu(i) {
core_stats = per_cpu_ptr(p, i);
- storage->rx_dropped += local_read(&core_stats->rx_dropped);
- storage->tx_dropped += local_read(&core_stats->tx_dropped);
- storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
+ storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+ storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+ storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
}
}
return storage;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 349480ef68a5..8b6b5e72b217 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9b8443774449..5f85e01d4093 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_aligned_key_t net_secret;
static siphash_aligned_key_t ts_secret;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 30b523fa4ad2..c90c74de90d5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3897,7 +3897,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
struct sk_buff *nskb, *tmp;
- int err;
+ int len_diff, err;
skb_push(skb, -skb_network_offset(skb) + offset);
@@ -3937,9 +3937,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(nskb, -skb_network_offset(nskb) + offset);
skb_release_head_state(nskb);
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
__copy_skb_header(nskb, skb);
skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ nskb->transport_header += len_diff;
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
offset + tnl_hlen);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ae662567a6cb..0ea29270d7e5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v4_ctl_sk);
}
+static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&dccp_hashinfo, AF_INET);
+}
+
static struct pernet_operations dccp_v4_ops = {
.init = dccp_v4_init_net,
.exit = dccp_v4_exit_net,
+ .exit_batch = dccp_v4_exit_batch,
.id = &dccp_v4_pernet_id,
.size = sizeof(struct dccp_v4_pernet),
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index eab3bd1ee9a0..fa663518fa0e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v6_ctl_sk);
}
+static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&dccp_hashinfo, AF_INET6);
+}
+
static struct pernet_operations dccp_v6_ops = {
.init = dccp_v6_init_net,
.exit = dccp_v6_exit_net,
+ .exit_batch = dccp_v6_exit_batch,
.id = &dccp_v6_pernet_id,
.size = sizeof(struct dccp_v6_pernet),
};
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0ee7d4c0c955..a09ba642b5e7 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
if (dn_db->router) {
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+ struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
dn_dn2eth(msg->neighbor, dn->addr);
}
@@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
{
int n;
struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+ struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
struct sk_buff *skb;
size_t size;
unsigned char *ptr;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 94b306f6d551..fbd98ac853ea 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!dn_db->router) {
dn_db->router = neigh_clone(neigh);
} else {
- if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+ if (msg->priority > container_of(dn_db->router,
+ struct dn_neigh, n)->priority)
neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
}
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 7e85f2a1ae25..d1d78a463a06 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1120,7 +1120,7 @@ source_ok:
/* Ok then, we assume its directly connected and move on */
select_source:
if (neigh)
- gateway = ((struct dn_neigh *)neigh)->addr;
+ gateway = container_of(neigh, struct dn_neigh, n)->addr;
if (gateway == 0)
gateway = fld.daddr;
if (fld.saddr == 0) {
@@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
/* Use the default router if there is one */
neigh = neigh_clone(dn_db->router);
if (neigh) {
- gateway = ((struct dn_neigh *)neigh)->addr;
+ gateway = container_of(neigh, struct dn_neigh, n)->addr;
goto make_route;
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 32d472a82241..bdccb613285d 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -451,6 +451,7 @@ out_rollback_unoffload:
switchdev_bridge_port_unoffload(brport_dev, dp,
&dsa_slave_switchdev_notifier,
&dsa_slave_switchdev_blocking_notifier);
+ dsa_flush_workqueue();
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
out_rollback:
@@ -1620,8 +1621,10 @@ int dsa_port_link_register_of(struct dsa_port *dp)
if (ds->ops->phylink_mac_link_down)
ds->ops->phylink_mac_link_down(ds, port,
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+ of_node_put(phy_np);
return dsa_port_phylink_register(dp);
}
+ of_node_put(phy_np);
return 0;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 41c69a6e7854..8022d50584db 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -285,7 +285,7 @@ static void dsa_port_manage_cpu_flood(struct dsa_port *dp)
if (other_dp->slave->flags & IFF_ALLMULTI)
flags.val |= BR_MCAST_FLOOD;
if (other_dp->slave->flags & IFF_PROMISC)
- flags.val |= BR_FLOOD;
+ flags.val |= BR_FLOOD | BR_MCAST_FLOOD;
}
err = dsa_port_pre_bridge_flags(dp, flags, NULL);
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index f64b805303cd..eb204ad36eee 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -21,6 +21,14 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *tag;
+ /* Calculate checksums (if required) before adding the trailer tag to
+ * avoid including it in calculations. That would lead to wrong
+ * checksums after the switch strips the tag.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, HELLCREEK_TAG_LEN);
*tag = BIT(dp->index);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 70e6c87fbe3d..d747166bb291 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -446,7 +446,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
- unsigned int allocsz;
/* this is non-NULL only with TCP/UDP Encapsulation */
if (x->encap) {
@@ -456,8 +455,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
- allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
- if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2ad3c7b42d6d..1d9e6d5e9a76 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
/* decrease mem now to avoid the memleak warning */
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2507,11 +2508,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
/* decrease mem now to avoid the memleak warning */
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
- } else
+ } else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
+ }
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 17440840a791..a5d57fa679ca 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -504,7 +504,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -726,15 +726,17 @@ EXPORT_SYMBOL_GPL(inet_unhash);
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
* property might be used by clever attacker.
- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
- * we use 256 instead to really give more isolation and
- * privacy, this only consumes 1 KB of kernel memory.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 instead to really
+ * give more isolation and privacy, at the expense of 256kB of kernel
+ * memory.
*/
-#define INET_TABLE_PERTURB_SHIFT 8
-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+#define INET_TABLE_PERTURB_SHIFT 16
+#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT)
+static u32 *table_perturb;
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
@@ -774,10 +776,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- net_get_random_once(table_perturb, sizeof(table_perturb));
- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+ net_get_random_once(table_perturb,
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+ offset %= remaining;
- offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -831,11 +836,12 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* If our first attempt found a candidate, skip next candidate
- * in 1/16 of cases to add some noise.
+ /* Here we want to add a little bit of randomness to the next source
+ * port that will be chosen. We use a max() with a random here so that
+ * on low contention the randomness is maximal and on high contention
+ * it may be inexistent.
*/
- if (!i && !(prandom_u32() % 16))
- i = 2;
+ i = max_t(int, i, (prandom_u32() & 7) * 2);
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
@@ -859,7 +865,7 @@ ok:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk);
@@ -909,6 +915,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
low_limit,
high_limit);
init_hashinfo_lhash2(h);
+
+ /* this one is used for source ports of outgoing connections */
+ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
+ sizeof(*table_perturb), GFP_KERNEL);
+ if (!table_perturb)
+ panic("TCP: failed to alloc table_perturb");
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 9e0bbd026560..0ec501845cb3 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
spin_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[tw->tw_bslot];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- /* Cache inet_bhashfn(), because 'struct net' might be no longer
- * available later in inet_twsk_kill().
- */
- tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
- hashinfo->bhash_size);
- bhead = &hashinfo->bhash[tw->tw_bslot];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
}
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+{
+ struct inet_timewait_sock *tw;
+ struct sock *sk;
+ struct hlist_nulls_node *node;
+ unsigned int slot;
+
+ for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+ struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+restart_rcu:
+ cond_resched();
+ rcu_read_lock();
+restart:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_state != TCP_TIME_WAIT)
+ continue;
+ tw = inet_twsk(sk);
+ if ((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))
+ continue;
+
+ if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+ continue;
+
+ if (unlikely((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))) {
+ inet_twsk_put(tw);
+ goto restart;
+ }
+
+ rcu_read_unlock();
+ local_bh_disable();
+ inet_twsk_deschedule_put(tw);
+ local_bh_enable();
+ goto restart_rcu;
+ }
+ /* If the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+ goto restart;
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 99db2e41ed10..aacee9dd771b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -581,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -605,8 +603,8 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id),
- key->tos & ~INET_ECN_MASK, 0, skb->mark,
- skb_get_hash(skb));
+ key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
+ skb->mark, skb_get_hash(skb));
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5a473319d3a5..94017a8c3994 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -294,8 +294,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark, 0);
+ RT_TOS(iph->tos), dev_net(dev),
+ tunnel->parms.link, tunnel->fwmark, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -570,7 +570,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
- 0, skb->mark, skb_get_hash(skb));
+ dev_net(dev), 0, skb->mark, skb_get_hash(skb));
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
@@ -726,7 +726,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->parms.o_key, RT_TOS(tos),
+ dev_net(dev), tunnel->parms.link,
tunnel->fwmark, skb_get_hash(skb));
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ /dev/null
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 3ee947557b88..aa9a11b20d18 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
struct net *net = sock_net(sk);
if (sk->sk_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ u32 tb_id = RT_TABLE_LOCAL;
int chk_addr_ret;
if (addr_len < sizeof(*addr))
@@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
addr->sin_addr.s_addr,
@@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
return -ENODEV;
}
}
+
+ if (!dev && sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
scoped);
rcu_read_unlock();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98c6f3429593..ed01063d8f30 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1726,6 +1726,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
struct rtable *rth;
+ bool no_policy;
u32 itag = 0;
int err;
@@ -1736,8 +1737,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (our)
flags |= RTCF_LOCAL;
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
return -ENOBUFS;
@@ -1753,6 +1758,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
RT_CACHE_STAT_INC(in_slow_mc);
+ skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
return 0;
}
@@ -1795,7 +1801,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct rtable *rth;
int err;
struct in_device *out_dev;
- bool do_cache;
+ bool do_cache, no_policy;
u32 itag = 0;
/* get a working reference to the output device */
@@ -1840,6 +1846,10 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
@@ -1852,8 +1862,7 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- rth = rt_dst_alloc(out_dev->dev, 0, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY),
+ rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
err = -ENOBUFS;
@@ -2228,6 +2237,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct rtable *rth;
struct flowi4 fl4;
bool do_cache = true;
+ bool no_policy;
/* IP on this device is disabled. */
@@ -2346,6 +2356,10 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
do_cache &= res->fi && !itag;
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2360,7 +2374,7 @@ local_input:
rth = rt_dst_alloc(ip_rt_get_dev(net, res),
flags | RTCF_LOCAL, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
goto e_nobufs;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2cb3b852d148..f33c31dd7366 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk,
struct sk_buff *skb)
{
@@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
return NULL;
treq = tcp_rsk(req);
+
+ /* treq->af_specific might be used to perform TCP_MD5 lookup */
+ treq->af_specific = af_ops;
+
treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
#if IS_ENABLED(CONFIG_MPTCP)
treq->is_mptcp = sk_is_mptcp(sk);
@@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf18fbcbf123..bb7ef45408e1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN)
goto out;
- if (tp->recvmsg_inq)
+ if (tp->recvmsg_inq) {
*cmsg_flags = TCP_CMSG_INQ;
+ msg->msg_get_inq = 1;
+ }
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -2559,7 +2561,7 @@ recv_sndq:
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len)
{
- int cmsg_flags = 0, ret, inq;
+ int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE))
@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
release_sock(sk);
sk_defer_free_flush(sk);
- if (cmsg_flags && ret >= 0) {
+ if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
- if (cmsg_flags & TCP_CMSG_INQ) {
- inq = tcp_inq_hint(sk);
- put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ if (msg->msg_get_inq) {
+ msg->msg_inq = tcp_inq_hint(sk);
+ if (cmsg_flags & TCP_CMSG_INQ)
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
+ sizeof(msg->msg_inq), &msg->msg_inq);
}
}
return ret;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2088f93fa37b..60f99e9fb6d1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
num_dupack = 1;
/* Consider if pure acks were aggregated in tcp_add_backlog() */
if (!(flag & FLAG_DATA))
@@ -5454,7 +5455,17 @@ static void tcp_new_space(struct sock *sk)
INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
/* pairs with tcp_poll() */
smp_mb();
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f9cec624068d..457f5b5d5d4a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
+ inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
list_for_each_entry(net, net_exit_list, exit_list)
tcp_fastopen_ctx_destroy(net);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6366df7aaf2a..6854bb1fb32b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9ede847f4199..1ca2f28c9981 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index fbab921670cc..9a8e014d9b5b 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
*
* If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
* called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
*/
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ u64 tx_tstamp;
if (!scb->tx.delivered_mstamp)
return;
+ tx_tstamp = tcp_skb_timestamp_us(skb);
if (!rs->prior_delivered ||
- after(scb->tx.delivered, rs->prior_delivered)) {
+ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ scb->end_seq, rs->last_end_seq)) {
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
+ tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
scb->tx.first_tx_mstamp);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 55d604c9b3b3..f2120e92caf1 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -482,7 +482,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
- unsigned int allocsz;
if (x->encap) {
int err = esp6_output_encap(x, skb, esp);
@@ -491,8 +490,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
return err;
}
- allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
- if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
goto cow;
if (!skb_cloned(skb)) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4740afecf7c6..32ccac10bd62 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -308,7 +308,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8753e9cec326..5136959b3dc5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
{
struct ip6_tnl *tunnel = netdev_priv(dev);
__be16 protocol;
+ __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -733,16 +734,13 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
- return -ENOMEM;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
- __be16 flags;
+ int tun_hlen;
tun_info = skb_tunnel_info_txcheck(skb);
if (IS_ERR(tun_info) ||
@@ -760,21 +758,27 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
dsfield = key->tos;
flags = key->tun_flags &
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
- tunnel->tun_hlen = gre_calc_hlen(flags);
+ tun_hlen = gre_calc_hlen(flags);
- gre_build_header(skb, tunnel->tun_hlen,
+ if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
+ return -ENOMEM;
+
+ gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+ return -ENOMEM;
+
+ flags = tunnel->parms.o_flags;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -1052,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* Push GRE header. */
proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
: htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 909f937befd7..7f695c39d9a8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
newpsl->sl_addr[i] = psl->sl_addr[i];
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
+ rcu_assign_pointer(pmc->sflist, newpsl);
+ kfree_rcu(psl, rcu);
psl = newpsl;
- rcu_assign_pointer(pmc->sflist, psl);
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
for (i = 0; i < psl->sl_count; i++) {
@@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
psl->sl_count, psl->sl_addr, 0);
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
}
- mutex_unlock(&idev->mc_lock);
rcu_assign_pointer(pmc->sflist, newpsl);
+ mutex_unlock(&idev->mc_lock);
+ kfree_rcu(psl, rcu);
pmc->sfmode = gsf->gf_fmode;
err = 0;
done:
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1da332450d98..8ce60ab89015 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(sk_partial);
+ struct net_device *dev = skb_dst(skb)->dev;
struct flow_keys flkeys;
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
- .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- strict ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
};
int err;
+ if (sk && sk->sk_bound_dev_if)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ else if (strict)
+ fl6.flowi6_oif = dev->ifindex;
+ else
+ fl6.flowi6_oif = l3mdev_master_ifindex(dev);
+
fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
dst = ip6_route_output(net, sk, &fl6);
err = dst->error;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 169e9df6d172..c4b6ce017d5e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3292,6 +3292,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+ unsigned int val;
int entries;
entries = dst_entries_get_fast(ops);
@@ -3302,13 +3303,13 @@ static int ip6_dst_gc(struct dst_ops *ops)
entries <= rt_max_size)
goto out;
- net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+ fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
- net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
out:
- net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
+ val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
return entries > rt_max_size;
}
@@ -6509,7 +6510,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.skip_notify_on_dev_down = 0;
- net->ipv6.ip6_rt_gc_expire = 30*HZ;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
ret = 0;
out:
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d1b61d00368e..9cc123f000fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 13678d3908fa..faaddaf43c90 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2207,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net)
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+}
+
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
+ .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fd51db3be91c..339d95df19d3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
- BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+ err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+ BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+ if (err)
+ return err;
memset(ext_hdrs, 0, sizeof(ext_hdrs));
err = parse_exthdrs(skb, hdr, ext_hdrs);
@@ -2898,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg))
+ if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2916,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg)))
+ if (!(ealg_tmpl_set(t, ealg) && ealg->available))
continue;
for (k = 1; ; k++) {
@@ -2927,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg))
+ if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
}
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 4eb8892fb2ff..ca10916340b0 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -147,7 +147,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
dev = dev_get_by_index_rcu(net, ifindex);
while (dev && !netif_is_l3_master(dev))
- dev = netdev_master_upper_dev_get(dev);
+ dev = netdev_master_upper_dev_get_rcu(dev);
return dev ? dev->ifindex : 0;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1b30c724ca8d..dc8aec1a5d3d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3657,6 +3657,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
cbss->transmitted_bss->bssid);
bss_conf->bssid_indicator = cbss->max_bssid_indicator;
bss_conf->bssid_index = cbss->bssid_index;
+ } else {
+ bss_conf->nontransmitted = false;
+ memset(bss_conf->transmitter_bssid, 0,
+ sizeof(bss_conf->transmitter_bssid));
+ bss_conf->bssid_indicator = 0;
+ bss_conf->bssid_index = 0;
}
/*
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index beb6b92eb780..88d797fa82ff 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
goto dont_reorder;
/* not part of a BA session */
- if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
- ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
goto dont_reorder;
/* new, potentially un-ordered, ampdu frame - process it */
diff --git a/net/mctp/device.c b/net/mctp/device.c
index f49be882e98e..99a3bda8852f 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev)
void mctp_dev_put(struct mctp_dev *mdev)
{
if (mdev && refcount_dec_and_test(&mdev->refs)) {
+ kfree(mdev->addrs);
dev_put(mdev->dev);
kfree_rcu(mdev, rcu);
}
@@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev)
mctp_route_remove_dev(mdev);
mctp_neigh_remove_dev(mdev);
- kfree(mdev->addrs);
mctp_dev_put(mdev);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 325383646f5c..b548cec86c9d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
}
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
ptr += 2;
}
@@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
ptr += 2;
}
@@ -1240,7 +1240,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1256,15 +1256,25 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
header.csum = 0;
csum = csum_partial(&header, sizeof(header), sum);
- return (__force u16)csum_fold(csum);
+ return csum_fold(csum);
}
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext)
{
return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
~csum_unfold(mpext->csum));
}
+static void put_len_csum(u16 len, __sum16 csum, void *data)
+{
+ __sum16 *sumptr = data + 2;
+ __be16 *ptr = data;
+
+ put_unaligned_be16(len, ptr);
+
+ put_unaligned(csum, sumptr);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
@@ -1340,8 +1350,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
- put_unaligned_be32(mpext->data_len << 16 |
- mptcp_make_csum(mpext), ptr);
+ put_len_csum(mpext->data_len,
+ mptcp_make_csum(mpext),
+ ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1392,11 +1403,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
goto mp_capable_done;
if (opts->csum_reqd) {
- put_unaligned_be32(opts->data_len << 16 |
- __mptcp_make_csum(opts->data_seq,
- opts->subflow_seq,
- opts->data_len,
- ~csum_unfold(opts->csum)), ptr);
+ put_len_csum(opts->data_len,
+ __mptcp_make_csum(opts->data_seq,
+ opts->subflow_seq,
+ opts->data_len,
+ ~csum_unfold(opts->csum)),
+ ptr);
} else {
put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 01809eef29b4..aa51b100e033 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -178,14 +178,13 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = (ssk->sk_state == TCP_CLOSE) &&
- (subflow->request_join || subflow->mp_join);
+ update_subflows = subflow->request_join || subflow->mp_join;
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
spin_lock_bh(&pm->lock);
if (update_subflows)
- pm->subflows--;
+ __mptcp_pm_close_subflow(msk);
/* Even if this subflow is not really established, tell the PM to try
* to pick the next ones, if possible.
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3c1a3036550f..5655a63aa6a8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -443,7 +443,8 @@ struct mptcp_subflow_context {
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1; /* local_id is correctly initialized */
+ local_id_valid : 1, /* local_id is correctly initialized */
+ valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -723,7 +724,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
@@ -833,6 +834,20 @@ unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
+/* called under PM lock */
+static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+ if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
+ WRITE_ONCE(msk->pm.accept_subflow, true);
+}
+
+static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+ spin_lock_bh(&msk->pm.lock);
+ __mptcp_pm_close_subflow(msk);
+ spin_unlock_bh(&msk->pm.lock);
+}
+
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index aba260f547da..be76ada89d96 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -888,7 +888,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
u32 offset, seq, delta;
- u16 csum;
+ __sum16 csum;
int len;
if (!csum_reqd)
@@ -955,11 +955,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ if (subflow->mp_join || subflow->valid_csum_seen) {
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ }
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
+ subflow->valid_csum_seen = 1;
return MAPPING_OK;
}
@@ -1141,6 +1144,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
}
}
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (subflow->mp_join)
+ return false;
+ else if (READ_ONCE(msk->csum_enabled))
+ return !subflow->valid_csum_seen;
+ else
+ return !subflow->fully_established;
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1218,7 +1233,7 @@ fallback:
return true;
}
- if (subflow->mp_join || subflow->fully_established) {
+ if (!subflow_can_fallback(subflow)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
@@ -1422,20 +1437,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sockaddr_storage addr;
int remote_id = remote->id;
int local_id = loc->id;
+ int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
int ifindex;
u8 flags;
- int err;
if (!mptcp_is_fully_established(sk))
- return -ENOTCONN;
+ goto err_out;
err = mptcp_subflow_create_socket(sk, &sf);
if (err)
- return err;
+ goto err_out;
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1492,6 +1507,12 @@ failed_unlink:
failed:
subflow->disposable = 1;
sock_release(sf);
+
+err_out:
+ /* we account subflows before the creation, and this failures will not
+ * be caught by sk_state_change()
+ */
+ mptcp_pm_close_subflow(msk);
return err;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 2c467c422dc6..fb67f1ca2495 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void)
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8ec55cd72572..204a5cdff5b1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -556,24 +556,14 @@ static bool tcp_in_window(struct nf_conn *ct,
}
}
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
+ } else if (tcph->syn &&
+ after(end, sender->td_end) &&
+ (state->state == TCP_CONNTRACK_SYN_SENT ||
+ state->state == TCP_CONNTRACK_SYN_RECV)) {
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, dataoff, tcph, sender);
- } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
- state->state == TCP_CONNTRACK_SYN_SENT) {
- /* Retransmitted syn-ack, or syn (simultaneous open).
*
* Re-init state for this direction, just like for the first
* syn(-ack) reply, it might differ in seq, ack or tcp options.
@@ -581,7 +571,8 @@ static bool tcp_in_window(struct nf_conn *ct,
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
end, win);
- if (!tcph->ack)
+
+ if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return true;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3e1afd10a9b6..55aa55b252b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
-#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
.procname = "nf_flowtable_udp_timeout",
.maxlen = sizeof(unsigned int),
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 3db256da919b..f2def06d1070 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
- tcp->state = TCP_CONNTRACK_ESTABLISHED;
tcp->seen[0].td_maxwin = 0;
tcp->seen[1].td_maxwin = 0;
}
-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ flow_offload_fixup_tcp(&ct->proto.tcp);
+
+ timeout = tn->timeouts[ct->proto.tcp.state];
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
- if (nf_ct_protonum(ct) == IPPROTO_TCP)
- flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
- flow_offload_fixup_ct_state(ct);
- flow_offload_fixup_ct_timeout(ct);
-}
-
static void flow_offload_route_release(struct flow_offload *flow)
{
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
- if (READ_ONCE(flow->timeout) != timeout)
+ if (timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
+ else
+ return;
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
-
- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
- if (nf_flow_has_expired(flow))
- flow_offload_fixup_ct(flow->ct);
- else
- flow_offload_fixup_ct_timeout(flow->ct);
-
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
- flow_offload_fixup_ct_state(flow->ct);
+ flow_offload_fixup_ct(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
-static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
-{
- struct dst_entry *dst;
-
- if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
- tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
- dst = tuple->dst_cache;
- if (!dst_check(dst, tuple->dst_cookie))
- return true;
- }
-
- return false;
-}
-
-static bool nf_flow_has_stale_dst(struct flow_offload *flow)
-{
- return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
- flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
-}
-
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
- nf_ct_is_dying(flow->ct) ||
- nf_flow_has_stale_dst(flow))
- set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+ nf_ct_is_dying(flow->ct))
+ flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 32c0eb1b4821..b350fe9d00b0 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
+{
+ if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
+ tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
+ return true;
+
+ return dst_check(tuple->dst_cache, tuple->dst_cookie);
+}
+
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state,
struct dst_entry *dst)
@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 16c3a39689f4..a096b9fbbbdf 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
static bool nft_expr_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
- if (!expr->ops->reduce) {
- pr_warn_once("missing reduce for expression %s ",
- expr->ops->type->name);
- return false;
- }
-
- if (nft_reduce_is_readonly(expr))
- return false;
-
- return expr->ops->reduce(track, expr);
+ return false;
}
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 900d48c810a1..6f0b07fe648d 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
}
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+ return false;
+
+ return true;
+}
+
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache,
const struct nf_conn *ct,
@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct neighbour *n;
u8 nud_state;
+ if (!nft_is_valid_ether_device(dev))
+ goto out;
+
n = dst_neigh_lookup(dst_cache, daddr);
if (!n)
return -1;
@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
if (!(nud_state & NUD_VALID))
return -1;
+out:
return dev_fill_forward_path(dev, ha, stack);
}
@@ -78,15 +91,6 @@ struct nft_forward_info {
enum flow_offload_xmit_type xmit_type;
};
-static bool nft_is_valid_ether_device(const struct net_device *dev)
-{
- if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
- dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
- return false;
-
- return true;
-}
-
static void nft_dev_path_info(const struct net_device_path_stack *stack,
struct nft_forward_info *info,
unsigned char *ha, struct nf_flowtable *flowtable)
@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
info->indev = NULL;
break;
}
- info->outdev = path->dev;
+ if (!info->outdev)
+ info->outdev = path->dev;
info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++;
@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
case IPPROTO_TCP:
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
sizeof(_tcph), &_tcph);
- if (unlikely(!tcph || tcph->fin || tcph->rst))
+ if (unlikely(!tcph || tcph->fin || tcph->rst ||
+ !nf_conntrack_tcp_established(ct)))
goto out;
break;
case IPPROTO_UDP:
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d600a566da32..7325bee7d144 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*ext = &rbe->ext;
return -EEXIST;
} else {
- p = &parent->rb_left;
+ overlap = false;
+ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 6d9e8e0a3a7d..05ae5a338b6f 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -54,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
}
#endif
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sk_buff *skb = pkt->skb;
+ struct sock *sk = NULL;
+
+ if (!indev)
+ return NULL;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return sk;
+}
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -67,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
sk = NULL;
if (!sk)
- switch(nft_pf(pkt)) {
- case NFPROTO_IPV4:
- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
- break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- case NFPROTO_IPV6:
- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
- break;
-#endif
- default:
- WARN_ON_ONCE(1);
- regs->verdict.code = NFT_BREAK;
- return;
- }
+ sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
@@ -224,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
return nft_expr_reduce_bitwise(track, expr);
}
+static int nft_socket_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
@@ -231,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = {
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
+ .validate = nft_socket_validate,
.reduce = nft_socket_reduce,
};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 47a876ccd288..73e9c0a9c187 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = len;
}
- skb_reset_transport_header(data_skb);
err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
if (msg->msg_name) {
@@ -2263,6 +2262,13 @@ static int netlink_dump(struct sock *sk)
* single netdev. The outcome is MSG_TRUNC error.
*/
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+
+ /* Make sure malicious BPF programs can not read unitialized memory
+ * from skb->head -> skb->data
+ */
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0) {
diff --git a/net/nfc/core.c b/net/nfc/core.c
index dc7a2404efdf..5b286e1e0a6f 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
kfree_skb(skb);
goto error;
@@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev)
dev->rfkill = NULL;
}
}
+ dev->shutting_down = false;
device_unlock(&dev->dev);
rc = nfc_genl_device_added(dev);
@@ -1166,12 +1167,10 @@ void nfc_unregister_device(struct nfc_dev *dev)
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
}
+ dev->shutting_down = true;
device_unlock(&dev->dev);
if (dev->ops->check_presence) {
- device_lock(&dev->dev);
- dev->shutting_down = true;
- device_unlock(&dev->dev);
del_timer_sync(&dev->check_pres_timer);
cancel_work_sync(&dev->check_pres_work);
}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 6055dc9a82aa..aa5e712adf07 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
skb_frag = nci_skb_alloc(ndev,
(NCI_DATA_HDR_SIZE + frag_len),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (skb_frag == NULL) {
rc = -ENOMEM;
goto free_exit;
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 19703a649b5a..78c4b6addf15 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
i = 0;
skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
if (i < data_len) {
skb = nci_skb_alloc(ndev,
conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f184b0db79d4..7c62417ccfd7 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!msg)
return -ENOMEM;
@@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
genlmsg_end(msg, hdr);
- genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
return 0;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7176156d3844..4c09cf8a0ab2 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2465,7 +2465,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
OVS_NLERR(log, "Flow action size exceeds max %u",
MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c39c09899fd0..002d2b9c69dd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2858,8 +2858,9 @@ tpacket_error:
status = TP_STATUS_SEND_REQUEST;
err = po->xmit(skb);
- if (unlikely(err > 0)) {
- err = net_xmit_errno(err);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
TP_STATUS_AVAILABLE) {
/* skb was destructed already */
@@ -3060,8 +3061,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->no_fcs = 1;
err = po->xmit(skb);
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
- goto out_unlock;
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto out_unlock;
+ }
dev_put(dev);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5327d130c4b5..73ee2771093d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -487,14 +487,27 @@ struct rds_tcp_net {
/* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation.
*/
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
+ /* TCP timer functions might access net namespace even after
+ * a process which created this net namespace terminated.
+ */
+ if (!sk->sk_net_refcnt) {
+ if (!maybe_get_net(net)) {
+ release_sock(sk);
+ return false;
+ }
+ sk->sk_net_refcnt = 1;
+ netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+ }
+ rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@@ -504,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
release_sock(sk);
+ return true;
}
static void rds_tcp_accept_worker(struct work_struct *work)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index dc8d745d6857..f8b5930d7b34 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -49,7 +49,7 @@ struct rds_tcp_statistics {
};
/* tcp.c */
-void rds_tcp_tune(struct socket *sock);
+bool rds_tcp_tune(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5461d77fff4f..f0c477c5d1db 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
if (ret < 0)
goto out;
- rds_tcp_tune(sock);
+ if (!rds_tcp_tune(sock)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (isv6) {
sin6.sin6_family = AF_INET6;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 09cadd556d1e..7edf2e69d3fe 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
__module_get(new_sock->ops->owner);
rds_tcp_keepalive(new_sock);
- rds_tcp_tune(new_sock);
+ if (!rds_tcp_tune(new_sock)) {
+ ret = -EINVAL;
+ goto out;
+ }
inet = inet_sk(new_sock->sk);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a4111408ffd0..6a1611b0e303 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
local, srx->transport_type, srx->transport.family);
udp_conf.family = srx->transport.family;
+ udp_conf.use_udp_checksums = true;
if (udp_conf.family == AF_INET) {
udp_conf.local_ip = srx->transport.sin.sin_addr;
udp_conf.local_udp_port = srx->transport.sin.sin_port;
@@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
} else {
udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
+ udp_conf.use_udp6_tx_checksums = true;
+ udp_conf.use_udp6_rx_checksums = true;
#endif
}
ret = udp_sock_create(net, &udp_conf, &local->socket);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index f15d6942da45..cc7e30733feb 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -113,7 +113,9 @@ static __net_exit void rxrpc_exit_net(struct net *net)
struct rxrpc_net *rxnet = rxrpc_net(net);
rxnet->live = false;
+ del_timer_sync(&rxnet->peer_keepalive_timer);
cancel_work_sync(&rxnet->peer_keepalive_work);
+ /* Remove the timer again as the worker may have restarted it. */
del_timer_sync(&rxnet->peer_keepalive_timer);
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 31fcd279c177..211c757bfc3c 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *pattr;
struct tcf_pedit *p;
int ret = 0, err;
- int ksize;
+ int i, ksize;
u32 index;
if (!nla) {
@@ -228,6 +228,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+ p->tcfp_off_max_hint = 0;
+ for (i = 0; i < p->tcfp_nkeys; ++i) {
+ u32 cur = p->tcfp_keys[i].off;
+
+ /* sanitize the shift value for any later use */
+ p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+ p->tcfp_keys[i].shift);
+
+ /* The AT option can read a single byte, we can bound the actual
+ * value with uchar max.
+ */
+ cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+ /* Each key touches 4 bytes starting from the computed offset */
+ p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ }
p->tcfp_flags = parm->flags;
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -308,13 +324,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
+ u32 max_offset;
int i;
- if (skb_unclone(skb, GFP_ATOMIC))
- return p->tcf_action;
-
spin_lock(&p->tcf_lock);
+ max_offset = (skb_transport_header_was_set(skb) ?
+ skb_transport_offset(skb) :
+ skb_network_offset(skb)) +
+ p->tcfp_off_max_hint;
+ if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+ goto unlock;
+
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
@@ -403,6 +424,7 @@ bad:
p->tcf_qstats.overlimits++;
done:
bstats_update(&p->tcf_bstats, skb);
+unlock:
spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index cf5649292ee0..4d27300c287c 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+static void __u32_destroy_key(struct tc_u_knode *n)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- tcf_exts_put_net(&n->exts);
if (ht && --ht->refcnt == 0)
kfree(ht);
+ kfree(n);
+}
+
+static void u32_destroy_key(struct tc_u_knode *n, bool free_pf)
+{
+ tcf_exts_put_net(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
if (free_pf)
free_percpu(n->pf);
@@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
if (free_pf)
free_percpu(n->pcpu_success);
#endif
- kfree(n);
- return 0;
+ __u32_destroy_key(n);
}
/* u32_delete_key_rcu should be called when free'ing a copied
@@ -811,10 +815,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
- /* bump reference count as long as we hold pointer to structure */
- if (ht)
- ht->refcnt++;
-
#ifdef CONFIG_CLS_U32_PERF
/* Statistics may be incremented by readers during update
* so we must keep them in tact. When the node is later destroyed
@@ -836,6 +836,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
return NULL;
}
+ /* bump reference count as long as we hold pointer to structure */
+ if (ht)
+ ht->refcnt++;
+
return new;
}
@@ -900,13 +904,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
extack);
if (err) {
- u32_destroy_key(new, false);
+ __u32_destroy_key(new);
return err;
}
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
- u32_destroy_key(new, false);
+ __u32_destroy_key(new);
return err;
}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b3815b568e8e..463c4a58d2c3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
goto out_unlock;
}
+ /* This happens when the response arrives after the timer is triggered. */
+ if (!asoc->strreset_chunk)
+ goto out_unlock;
+
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
asoc->state, asoc->ep, asoc,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 14ddc40149e8..fce16b9d6e1a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -243,11 +243,27 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);
+static void smc_fback_restore_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = NULL;
+
+ smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
+ smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
+ smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
+ smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
+}
+
static void smc_restore_fallback_changes(struct smc_sock *smc)
{
if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
smc->clcsock->file->private_data = smc->sk.sk_socket;
smc->clcsock->file = NULL;
+ smc_fback_restore_callbacks(smc);
}
}
@@ -373,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
mutex_init(&smc->clcsock_release_lock);
+ smc_init_saved_callbacks(smc);
return sk;
}
@@ -744,47 +761,73 @@ out:
static void smc_fback_state_change(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_state_change);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_data_ready(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_data_ready);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_write_space(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_write_space);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_error_report(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_error_report);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_replace_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+
+ smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
+ &smc->clcsk_state_change);
+ smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
+ &smc->clcsk_data_ready);
+ smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
+ &smc->clcsk_write_space);
+ smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
+ &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
}
static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
- struct sock *clcsk;
int rc = 0;
mutex_lock(&smc->clcsock_release_lock);
@@ -792,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
rc = -EBADF;
goto out;
}
- clcsk = smc->clcsock->sk;
- if (smc->use_fallback)
- goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -810,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
* in smc sk->sk_wq and they should be woken up
* as clcsock's wait queue is woken up.
*/
- smc->clcsk_state_change = clcsk->sk_state_change;
- smc->clcsk_data_ready = clcsk->sk_data_ready;
- smc->clcsk_write_space = clcsk->sk_write_space;
- smc->clcsk_error_report = clcsk->sk_error_report;
-
- clcsk->sk_state_change = smc_fback_state_change;
- clcsk->sk_data_ready = smc_fback_data_ready;
- clcsk->sk_write_space = smc_fback_write_space;
- clcsk->sk_error_report = smc_fback_error_report;
-
- smc->clcsock->sk->sk_user_data =
- (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_fback_replace_callbacks(smc);
}
out:
mutex_unlock(&smc->clcsock_release_lock);
@@ -1475,6 +1504,8 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_state = SMC_CLOSED;
if (rc == -EPIPE || rc == -EAGAIN)
smc->sk.sk_err = EPIPE;
+ else if (rc == -ECONNREFUSED)
+ smc->sk.sk_err = ECONNREFUSED;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
sock_put(&smc->sk); /* passive closing */
@@ -1594,6 +1625,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
* function; switch it back to the original sk_data_ready function
*/
new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
+
+ /* if new clcsock has also inherited the fallback-specific callback
+ * functions, switch them back to the original ones.
+ */
+ if (lsmc->use_fallback) {
+ if (lsmc->clcsk_state_change)
+ new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
+ if (lsmc->clcsk_write_space)
+ new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
+ if (lsmc->clcsk_error_report)
+ new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
+ }
+
(*new_smc)->clcsock = new_clcsock;
out:
return rc;
@@ -2353,17 +2397,20 @@ out:
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
- struct smc_sock *lsmc =
- smc_clcsock_user_data(listen_clcsock);
+ struct smc_sock *lsmc;
+ read_lock_bh(&listen_clcsock->sk_callback_lock);
+ lsmc = smc_clcsock_user_data(listen_clcsock);
if (!lsmc)
- return;
+ goto out;
lsmc->clcsk_data_ready(listen_clcsock);
if (lsmc->sk.sk_state == SMC_LISTEN) {
sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
sock_put(&lsmc->sk);
}
+out:
+ read_unlock_bh(&listen_clcsock->sk_callback_lock);
}
static int smc_listen(struct socket *sock, int backlog)
@@ -2395,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog)
/* save original sk_data_ready function and establish
* smc-specific sk_data_ready function
*/
- smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
- smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
+ smc_clcsock_data_ready, &smc->clcsk_data_ready);
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
/* save original ops */
smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
@@ -2413,7 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog)
rc = kernel_listen(smc->clcsock, backlog);
if (rc) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
+ smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
goto out;
}
sk->sk_max_ack_backlog = backlog;
@@ -2674,8 +2727,10 @@ static int smc_shutdown(struct socket *sock, int how)
if (smc->use_fallback) {
rc = kernel_sock_shutdown(smc->clcsock, how);
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk);
+ }
goto out;
}
switch (how) {
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ea0620529ebe..5ed765ea0c73 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline void smc_init_saved_callbacks(struct smc_sock *smc)
+{
+ smc->clcsk_state_change = NULL;
+ smc->clcsk_data_ready = NULL;
+ smc->clcsk_write_space = NULL;
+ smc->clcsk_error_report = NULL;
+}
+
static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
{
return (struct smc_sock *)
((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
}
+/* save target_cb in saved_cb, and replace target_cb with new_cb */
+static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
+ void (*new_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ /* only save once */
+ if (!*saved_cb)
+ *saved_cb = *target_cb;
+ *target_cb = new_cb;
+}
+
+/* restore target_cb to saved_cb, and reset saved_cb to NULL */
+static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ if (!*saved_cb)
+ return;
+ *target_cb = *saved_cb;
+ *saved_cb = NULL;
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 676cb2333d3c..31db7438857c 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -214,8 +214,11 @@ again:
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
}
smc_close_cleanup_listen(sk);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 51e8eb2933ff..338b9ef806e8 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
break;
}
+ if (!timeo)
+ return -EAGAIN;
if (signal_pending(current)) {
read_done = sock_intr_errno(timeo);
break;
}
- if (!timeo)
- return -EAGAIN;
}
if (!smc_rx_data_available(conn)) {
diff --git a/net/socket.c b/net/socket.c
index 6887840682bb..bb6a1a12fbde 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file)
{
if (file->f_op == &socket_file_ops)
- return file->private_data; /* set in sock_map_fd */
+ return file->private_data; /* set in sock_alloc_file */
return NULL;
}
@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
}
EXPORT_SYMBOL(sock_create_kern);
-int __sys_socket(int family, int type, int protocol)
+static struct socket *__sys_socket_create(int family, int type, int protocol)
{
- int retval;
struct socket *sock;
- int flags;
+ int retval;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
- flags = type & ~SOCK_TYPE_MASK;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
+ if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return ERR_PTR(-EINVAL);
type &= SOCK_TYPE_MASK;
+ retval = sock_create(family, type, protocol, &sock);
+ if (retval < 0)
+ return ERR_PTR(retval);
+
+ return sock;
+}
+
+struct file *__sys_socket_file(int family, int type, int protocol)
+{
+ struct socket *sock;
+ struct file *file;
+ int flags;
+
+ sock = __sys_socket_create(family, type, protocol);
+ if (IS_ERR(sock))
+ return ERR_CAST(sock);
+
+ flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- retval = sock_create(family, type, protocol, &sock);
- if (retval < 0)
- return retval;
+ file = sock_alloc_file(sock, flags, NULL);
+ if (IS_ERR(file))
+ sock_release(sock);
+
+ return file;
+}
+
+int __sys_socket(int family, int type, int protocol)
+{
+ struct socket *sock;
+ int flags;
+
+ sock = __sys_socket_create(family, type, protocol);
+ if (IS_ERR(sock))
+ return PTR_ERR(sock);
+
+ flags = type & ~SOCK_TYPE_MASK;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 61c276bddaf2..f549e4c05def 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
* done without the correct namespace:
*/
.flags = RPC_CLNT_CREATE_NOPING |
+ RPC_CLNT_CREATE_CONNECTED |
RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
};
struct rpc_clnt *clnt;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index af0174d7ce5a..e2c6eca0271b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task,
static int rpc_decode_header(struct rpc_task *task,
struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);
+static int rpc_ping_noreply(struct rpc_clnt *clnt);
static void rpc_check_timeout(struct rpc_task *task);
static void rpc_register_client(struct rpc_clnt *clnt)
@@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
+ } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+ int err = rpc_ping_noreply(clnt);
+ if (err != 0) {
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
}
clnt->cl_softrtry = 1;
@@ -1065,10 +1072,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (task->tk_xprt &&
- !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
- (task->tk_flags & RPC_TASK_MOVEABLE)))
- return;
+ if (task->tk_xprt) {
+ if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE)))
+ return;
+ xprt_release(task);
+ xprt_put(task->tk_xprt);
+ }
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
else
@@ -2706,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
+static const struct rpc_procinfo rpcproc_null_noreply = {
+ .p_encode = rpcproc_encode_null,
+};
+
static void
rpc_null_call_prepare(struct rpc_task *task, void *data)
{
@@ -2759,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
return status;
}
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null_noreply,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .callback_ops = &rpc_null_ops,
+ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
+ };
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8ab64ea46870..650102a9c86a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1419,6 +1419,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
+ * xs_local_state_change - callback to handle AF_LOCAL socket state changes
+ * @sk: socket whose state has changed
+ *
+ */
+static void xs_local_state_change(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+
+ if (!(xprt = xprt_from_sock(sk)))
+ return;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ if (sk->sk_shutdown & SHUTDOWN_MASK) {
+ clear_bit(XPRT_CONNECTED, &xprt->state);
+ /* Trigger the socket release */
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
+ }
+}
+
+/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
*
@@ -1866,6 +1886,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
+ sk->sk_state_change = xs_local_state_change;
sk->sk_error_report = xs_error_report;
xprt_clear_connected(xprt);
@@ -1950,6 +1971,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
+ if (transport->file)
+ goto force_disconnect;
+
if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
@@ -1962,11 +1986,17 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
*/
task->tk_rpc_status = -ENOTCONN;
rpc_exit(task, -ENOTCONN);
- return;
+ goto out_wake;
}
ret = xs_local_setup_socket(transport);
if (ret && !RPC_IS_SOFTCONN(task))
msleep_interruptible(15000);
+ return;
+force_disconnect:
+ xprt_force_disconnect(xprt);
+out_wake:
+ xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
@@ -2845,9 +2875,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
- ret = ERR_PTR(xs_local_setup_socket(transport));
- if (ret)
- goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 12f7b56771d9..3919fe2c58c5 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -483,11 +483,13 @@ handle_error:
copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
copy = min_t(size_t, copy, (max_open_record_len - record->len));
- rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
- if (rc)
- goto handle_error;
- tls_append_frag(record, pfrag, copy);
+ if (copy) {
+ rc = tls_device_copy_data(page_address(pfrag->page) +
+ pfrag->offset, copy, msg_iter);
+ if (rc)
+ goto handle_error;
+ tls_append_frag(record, pfrag, copy);
+ }
size -= copy;
if (!size) {
@@ -1345,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev)
/* Device contexts for RX and TX will be freed in on sk_destruct
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+ * Now release the ref taken above.
*/
+ if (refcount_dec_and_test(&ctx->refcount))
+ tls_device_free_ctx(ctx);
}
up_write(&device_offload_lock);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 21e808fcb676..1a3551b6d18b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3173,6 +3173,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
chandef->width =
nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+ if (chandef->chan->band == NL80211_BAND_S1GHZ) {
+ /* User input error for channel width doesn't match channel */
+ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_CHANNEL_WIDTH],
+ "bad channel width");
+ return -EINVAL;
+ }
+ }
if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
@@ -11657,18 +11666,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
struct cfg80211_bitrate_mask mask;
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
+ wdev_lock(wdev);
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES, &mask,
dev, true);
if (err)
- return err;
+ goto out;
- return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+out:
+ wdev_unlock(wdev);
+ return err;
}
static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4a6d86432910..6d82bd9eaf8c 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) {
struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data;
- return s1gop->primary_ch;
+ return s1gop->oper_ch;
}
} else {
tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2c34caee0fd1..3a9348030e20 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (sk_can_busy_loop(sk))
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
- if (xsk_no_wakeup(sk))
+ if (xs->zc && xsk_no_wakeup(sk))
return 0;
pool = xs->pool;
@@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xp_get_pool(umem_xs->pool);
xs->pool = umem_xs->pool;
+
+ /* If underlying shared umem was created without Tx
+ * ring, allocate Tx descs array that Tx batching API
+ * utilizes
+ */
+ if (xs->tx && !xs->pool->tx_descs) {
+ err = xp_alloc_tx_descs(xs->pool, xs);
+ if (err) {
+ xp_put_pool(xs->pool);
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+ }
}
xdp_get_umem(umem_xs->umem);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index af040ffa14ff..87bdd71c7bb6 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool)
kvfree(pool);
}
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
+{
+ pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
+ GFP_KERNEL);
+ if (!pool->tx_descs)
+ return -ENOMEM;
+
+ return 0;
+}
+
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
@@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
- if (xs->tx) {
- pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
- if (!pool->tx_descs)
+ if (xs->tx)
+ if (xp_alloc_tx_descs(pool, xs))
goto out;
- }
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 19aa994f5d2c..f1876ea61fdc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2593,12 +2593,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
__u32 mark = 0;
+ int oif;
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
family = xfrm[i]->props.family;
- dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+ oif = fl->flowi_oif ? : fl->flowi_l3mdev;
+ dst = xfrm_dst_lookup(xfrm[i], tos, oif,
&saddr, &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
@@ -3742,7 +3744,7 @@ static int stale_bundle(struct dst_entry *dst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
- dst->dev = dev_net(dev)->loopback_dev;
+ dst->dev = blackhole_netdev;
dev_hold(dst->dev);
dev_put(dev);
}
diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h
index 9fdd8e7c2a45..951388334a3f 100644
--- a/samples/trace_events/trace_custom_sched.h
+++ b/samples/trace_events/trace_custom_sched.h
@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
* that the custom event is using.
*/
TP_PROTO(bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next),
+ struct task_struct *next,
+ unsigned int prev_state),
- TP_ARGS(preempt, prev_state, prev, next),
+ TP_ARGS(preempt, prev, next, prev_state),
/*
* The next fields are where the customization happens.
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 9717e6f6fb31..33c1ed581522 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -231,7 +231,7 @@ objtool_args = \
$(if $(part-of-module), --module) \
$(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt) \
$(if $(CONFIG_FRAME_POINTER),, --no-fp) \
- $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\
+ $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \
$(if $(CONFIG_RETPOLINE), --retpoline) \
$(if $(CONFIG_X86_SMAP), --uaccess) \
$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 20f44504a644..9361a1ef02c9 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -140,7 +140,7 @@ objtool_link()
if ! is_enabled CONFIG_FRAME_POINTER; then
objtoolopt="${objtoolopt} --no-fp"
fi
- if is_enabled CONFIG_GCOV_KERNEL || is_enabled CONFIG_LTO_CLANG; then
+ if is_enabled CONFIG_GCOV_KERNEL; then
objtoolopt="${objtoolopt} --no-unreachable"
fi
if is_enabled CONFIG_RETPOLINE; then
diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index 0ae4e4e57a40..3fb8f9026e9b 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -179,7 +179,8 @@ int hashtab_duplicate(struct hashtab *new, struct hashtab *orig,
kmem_cache_free(hashtab_node_cachep, cur);
}
}
- kmem_cache_free(hashtab_node_cachep, new);
+ kfree(new->htable);
+ memset(new, 0, sizeof(*new));
return -ENOMEM;
}
diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
index 626c0c34b0b6..3a53914277d3 100644
--- a/sound/firewire/fireworks/fireworks_hwdep.c
+++ b/sound/firewire/fireworks/fireworks_hwdep.c
@@ -34,6 +34,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
type = SNDRV_FIREWIRE_EVENT_EFW_RESPONSE;
if (copy_to_user(buf, &type, sizeof(type)))
return -EFAULT;
+ count += sizeof(type);
remained -= sizeof(type);
buf += sizeof(type);
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 48b8ed752b69..3f35972e1cf7 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -127,11 +127,10 @@ static int i915_gfx_present(struct pci_dev *hdac_pci)
display_dev = pci_get_class(class, display_dev);
if (display_dev && display_dev->vendor == PCI_VENDOR_ID_INTEL &&
- connectivity_check(display_dev, hdac_pci))
+ connectivity_check(display_dev, hdac_pci)) {
+ pci_dev_put(display_dev);
match = true;
-
- pci_dev_put(display_dev);
-
+ }
} while (!match && display_dev);
return match;
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index 8b0a16ba27d3..a8fe01764b25 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -424,6 +424,15 @@ static const struct config_entry config_table[] = {
.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
.device = 0x54c8,
},
+ /* RaptorLake-P */
+ {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+ .device = 0x51ca,
+ },
+ {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+ .device = 0x51cb,
+ },
#endif
};
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 69cbc79fbb71..2aaaa6807174 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -1094,7 +1094,8 @@ wavefront_send_sample (snd_wavefront_t *dev,
if (dataptr < data_end) {
- __get_user (sample_short, dataptr);
+ if (get_user(sample_short, dataptr))
+ return -EFAULT;
dataptr += skip;
if (data_is_unsigned) { /* GUS ? */
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index 9c48f3a9e3d1..164335d3c200 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -1428,7 +1428,7 @@ void dmasound_deinit(void)
unregister_sound_dsp(sq_unit);
}
-static int dmasound_setup(char *str)
+static int __maybe_unused dmasound_setup(char *str)
{
int ints[6], size;
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 3e086eebf88d..31fe41795571 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1395,7 +1395,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
last_try:
/* the last try; check the empty slots in pins */
- for (i = 0; i < spec->num_nids; i++) {
+ for (i = 0; i < spec->pcm_used; i++) {
if (!test_bit(i, &spec->pcm_bitmap))
return i;
}
@@ -2325,7 +2325,9 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec)
* dev_num is the device entry number in a pin
*/
- if (codec->mst_no_extra_pcms)
+ if (spec->dyn_pcm_no_legacy && codec->mst_no_extra_pcms)
+ pcm_num = spec->num_cvts;
+ else if (codec->mst_no_extra_pcms)
pcm_num = spec->num_nids;
else
pcm_num = spec->num_nids + spec->dev_num - 1;
@@ -4551,6 +4553,7 @@ HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_adlp_hdmi),
+HDA_CODEC_ENTRY(0x8086281f, "Raptorlake-P HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi),
HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 62fbf3772b41..ad292df7d805 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -937,6 +937,9 @@ static int alc_init(struct hda_codec *codec)
return 0;
}
+#define alc_free snd_hda_gen_free
+
+#ifdef CONFIG_PM
static inline void alc_shutup(struct hda_codec *codec)
{
struct alc_spec *spec = codec->spec;
@@ -950,9 +953,6 @@ static inline void alc_shutup(struct hda_codec *codec)
alc_shutup_pins(codec);
}
-#define alc_free snd_hda_gen_free
-
-#ifdef CONFIG_PM
static void alc_power_eapd(struct hda_codec *codec)
{
alc_auto_setup_eapd(codec, false);
@@ -966,9 +966,7 @@ static int alc_suspend(struct hda_codec *codec)
spec->power_hook(codec);
return 0;
}
-#endif
-#ifdef CONFIG_PM
static int alc_resume(struct hda_codec *codec)
{
struct alc_spec *spec = codec->spec;
@@ -6780,6 +6778,41 @@ static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec,
}
}
+static void alc_fixup_dell4_mic_no_presence_quiet(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ struct alc_spec *spec = codec->spec;
+ struct hda_input_mux *imux = &spec->gen.input_mux;
+ int i;
+
+ alc269_fixup_limit_int_mic_boost(codec, fix, action);
+
+ switch (action) {
+ case HDA_FIXUP_ACT_PRE_PROBE:
+ /**
+ * Set the vref of pin 0x19 (Headset Mic) and pin 0x1b (Headphone Mic)
+ * to Hi-Z to avoid pop noises at startup and when plugging and
+ * unplugging headphones.
+ */
+ snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ);
+ snd_hda_codec_set_pin_target(codec, 0x1b, PIN_VREFHIZ);
+ break;
+ case HDA_FIXUP_ACT_PROBE:
+ /**
+ * Make the internal mic (0x12) the default input source to
+ * prevent pop noises on cold boot.
+ */
+ for (i = 0; i < imux->num_items; i++) {
+ if (spec->gen.imux_pins[i] == 0x12) {
+ spec->gen.cur_mux[0] = i;
+ break;
+ }
+ }
+ break;
+ }
+}
+
enum {
ALC269_FIXUP_GPIO2,
ALC269_FIXUP_SONY_VAIO,
@@ -6821,6 +6854,7 @@ enum {
ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+ ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET,
ALC269_FIXUP_HEADSET_MODE,
ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
ALC269_FIXUP_ASPIRE_HEADSET_MIC,
@@ -7006,11 +7040,13 @@ enum {
ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE,
ALC287_FIXUP_LEGION_16ACHG6,
ALC287_FIXUP_CS35L41_I2C_2,
+ ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED,
ALC245_FIXUP_CS35L41_SPI_2,
ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED,
ALC245_FIXUP_CS35L41_SPI_4,
ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED,
ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED,
+ ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -8768,6 +8804,14 @@ static const struct hda_fixup alc269_fixups[] = {
[ALC287_FIXUP_CS35L41_I2C_2] = {
.type = HDA_FIXUP_FUNC,
.v.func = cs35l41_fixup_i2c_two,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+ },
+ [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l41_fixup_i2c_two,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_HP_MUTE_LED,
},
[ALC245_FIXUP_CS35L41_SPI_2] = {
.type = HDA_FIXUP_FUNC,
@@ -8799,6 +8843,21 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC285_FIXUP_HP_MUTE_LED,
},
+ [ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_dell4_mic_no_presence_quiet,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+ },
+ [ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x02a1112c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8889,6 +8948,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1028, 0x0a38, "Dell Latitude 7520", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET),
SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC),
SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK),
@@ -9016,21 +9076,22 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
- SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4),
SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8992, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x103c, 0x8994, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8994, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8995, "HP EliteBook 855 G9", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x89a4, "HP ProBook 440 G9", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89a6, "HP ProBook 450 G9", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89ac, "HP EliteBook 640 G9", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89ae, "HP EliteBook 650 G9", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -9163,6 +9224,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x866d, "Clevo NP5[05]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x867c, "Clevo NP7[01]PNP", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x867d, "Clevo NP7[01]PN[HJK]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME),
@@ -9237,6 +9299,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
+ SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
@@ -9279,6 +9342,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS),
+ SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1111, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1119, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1129, "TongFang GMxZGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
@@ -9286,6 +9357,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
+ SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
#if 0
/* Below is a quirk table taken from the old code.
diff --git a/sound/soc/atmel/mchp-pdmc.c b/sound/soc/atmel/mchp-pdmc.c
index 1a7802fbf23c..a3856c73e221 100644
--- a/sound/soc/atmel/mchp-pdmc.c
+++ b/sound/soc/atmel/mchp-pdmc.c
@@ -966,6 +966,7 @@ static int mchp_pdmc_process(struct snd_pcm_substream *substream,
static struct snd_dmaengine_pcm_config mchp_pdmc_config = {
.process = mchp_pdmc_process,
+ .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
};
static int mchp_pdmc_probe(struct platform_device *pdev)
diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index 33e43013ff77..0d639a33ad96 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -46,35 +46,6 @@
*/
#undef ENABLE_MIC_INPUT
-static struct clk *mclk;
-
-static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card,
- struct snd_soc_dapm_context *dapm,
- enum snd_soc_bias_level level)
-{
- static int mclk_on;
- int ret = 0;
-
- switch (level) {
- case SND_SOC_BIAS_ON:
- case SND_SOC_BIAS_PREPARE:
- if (!mclk_on)
- ret = clk_enable(mclk);
- if (ret == 0)
- mclk_on = 1;
- break;
-
- case SND_SOC_BIAS_OFF:
- case SND_SOC_BIAS_STANDBY:
- if (mclk_on)
- clk_disable(mclk);
- mclk_on = 0;
- break;
- }
-
- return ret;
-}
-
static const struct snd_soc_dapm_widget at91sam9g20ek_dapm_widgets[] = {
SND_SOC_DAPM_MIC("Int Mic", NULL),
SND_SOC_DAPM_SPK("Ext Spk", NULL),
@@ -135,7 +106,6 @@ static struct snd_soc_card snd_soc_at91sam9g20ek = {
.owner = THIS_MODULE,
.dai_link = &at91sam9g20ek_dai,
.num_links = 1,
- .set_bias_level = at91sam9g20ek_set_bias_level,
.dapm_widgets = at91sam9g20ek_dapm_widgets,
.num_dapm_widgets = ARRAY_SIZE(at91sam9g20ek_dapm_widgets),
@@ -148,7 +118,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
struct device_node *codec_np, *cpu_np;
- struct clk *pllb;
struct snd_soc_card *card = &snd_soc_at91sam9g20ek;
int ret;
@@ -162,31 +131,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
return -EINVAL;
}
- /*
- * Codec MCLK is supplied by PCK0 - set it up.
- */
- mclk = clk_get(NULL, "pck0");
- if (IS_ERR(mclk)) {
- dev_err(&pdev->dev, "Failed to get MCLK\n");
- ret = PTR_ERR(mclk);
- goto err;
- }
-
- pllb = clk_get(NULL, "pllb");
- if (IS_ERR(pllb)) {
- dev_err(&pdev->dev, "Failed to get PLLB\n");
- ret = PTR_ERR(pllb);
- goto err_mclk;
- }
- ret = clk_set_parent(mclk, pllb);
- clk_put(pllb);
- if (ret != 0) {
- dev_err(&pdev->dev, "Failed to set MCLK parent\n");
- goto err_mclk;
- }
-
- clk_set_rate(mclk, MCLK_RATE);
-
card->dev = &pdev->dev;
/* Parse device node info */
@@ -230,9 +174,6 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev)
return ret;
-err_mclk:
- clk_put(mclk);
- mclk = NULL;
err:
atmel_ssc_put_audio(0);
return ret;
@@ -242,8 +183,6 @@ static int at91sam9g20ek_audio_remove(struct platform_device *pdev)
{
struct snd_soc_card *card = platform_get_drvdata(pdev);
- clk_disable(mclk);
- mclk = NULL;
snd_soc_unregister_card(card);
atmel_ssc_put_audio(0);
diff --git a/sound/soc/codecs/cs35l41-lib.c b/sound/soc/codecs/cs35l41-lib.c
index e5a56bcbb223..aa6823fbd1a4 100644
--- a/sound/soc/codecs/cs35l41-lib.c
+++ b/sound/soc/codecs/cs35l41-lib.c
@@ -822,8 +822,8 @@ int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap)
word_offset = otp_map_match->word_offset;
for (i = 0; i < otp_map_match->num_elements; i++) {
- dev_dbg(dev, "bitoffset= %d, word_offset=%d, bit_sum mod 32=%d\n",
- bit_offset, word_offset, bit_sum % 32);
+ dev_dbg(dev, "bitoffset= %d, word_offset=%d, bit_sum mod 32=%d otp_map[i].size = %d\n",
+ bit_offset, word_offset, bit_sum % 32, otp_map[i].size);
if (bit_offset + otp_map[i].size - 1 >= 32) {
otp_val = (otp_mem[word_offset] &
GENMASK(31, bit_offset)) >> bit_offset;
@@ -831,12 +831,14 @@ int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap)
GENMASK(bit_offset + otp_map[i].size - 33, 0)) <<
(32 - bit_offset);
bit_offset += otp_map[i].size - 32;
- } else {
+ } else if (bit_offset + otp_map[i].size - 1 >= 0) {
otp_val = (otp_mem[word_offset] &
GENMASK(bit_offset + otp_map[i].size - 1, bit_offset)
) >> bit_offset;
bit_offset += otp_map[i].size;
- }
+ } else /* both bit_offset and otp_map[i].size are 0 */
+ otp_val = 0;
+
bit_sum += otp_map[i].size;
if (bit_offset == 32) {
diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c
index 13009d08b09a..c7493549a9a5 100644
--- a/sound/soc/codecs/da7219.c
+++ b/sound/soc/codecs/da7219.c
@@ -446,7 +446,7 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol,
struct soc_mixer_control *mixer_ctrl =
(struct soc_mixer_control *) kcontrol->private_value;
unsigned int reg = mixer_ctrl->reg;
- __le16 val;
+ __le16 val_new, val_old;
int ret;
/*
@@ -454,13 +454,19 @@ static int da7219_tonegen_freq_put(struct snd_kcontrol *kcontrol,
* Therefore we need to convert to little endian here to align with
* HW registers.
*/
- val = cpu_to_le16(ucontrol->value.integer.value[0]);
+ val_new = cpu_to_le16(ucontrol->value.integer.value[0]);
mutex_lock(&da7219->ctrl_lock);
- ret = regmap_raw_write(da7219->regmap, reg, &val, sizeof(val));
+ ret = regmap_raw_read(da7219->regmap, reg, &val_old, sizeof(val_old));
+ if (ret == 0 && (val_old != val_new))
+ ret = regmap_raw_write(da7219->regmap, reg,
+ &val_new, sizeof(val_new));
mutex_unlock(&da7219->ctrl_lock);
- return ret;
+ if (ret < 0)
+ return ret;
+
+ return val_old != val_new;
}
diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
index 6884ae505e33..3143f9cd7277 100644
--- a/sound/soc/codecs/lpass-rx-macro.c
+++ b/sound/soc/codecs/lpass-rx-macro.c
@@ -3566,12 +3566,16 @@ static int rx_macro_probe(struct platform_device *pdev)
return PTR_ERR(rx->pds);
base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(base))
- return PTR_ERR(base);
+ if (IS_ERR(base)) {
+ ret = PTR_ERR(base);
+ goto err;
+ }
rx->regmap = devm_regmap_init_mmio(dev, base, &rx_regmap_config);
- if (IS_ERR(rx->regmap))
- return PTR_ERR(rx->regmap);
+ if (IS_ERR(rx->regmap)) {
+ ret = PTR_ERR(rx->regmap);
+ goto err;
+ }
dev_set_drvdata(dev, rx);
@@ -3632,6 +3636,8 @@ err_mclk:
err_dcodec:
clk_disable_unprepare(rx->macro);
err:
+ lpass_macro_pds_exit(rx->pds);
+
return ret;
}
diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index 714a411d5337..55503ba480bb 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -1828,8 +1828,10 @@ static int tx_macro_probe(struct platform_device *pdev)
return PTR_ERR(tx->pds);
base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(base))
- return PTR_ERR(base);
+ if (IS_ERR(base)) {
+ ret = PTR_ERR(base);
+ goto err;
+ }
/* Update defaults for lpass sc7280 */
if (of_device_is_compatible(np, "qcom,sc7280-lpass-tx-macro")) {
@@ -1846,8 +1848,10 @@ static int tx_macro_probe(struct platform_device *pdev)
}
tx->regmap = devm_regmap_init_mmio(dev, base, &tx_regmap_config);
- if (IS_ERR(tx->regmap))
- return PTR_ERR(tx->regmap);
+ if (IS_ERR(tx->regmap)) {
+ ret = PTR_ERR(tx->regmap);
+ goto err;
+ }
dev_set_drvdata(dev, tx);
@@ -1907,6 +1911,8 @@ err_mclk:
err_dcodec:
clk_disable_unprepare(tx->macro);
err:
+ lpass_macro_pds_exit(tx->pds);
+
return ret;
}
diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c
index f3cb596058e0..d18b56e60433 100644
--- a/sound/soc/codecs/lpass-va-macro.c
+++ b/sound/soc/codecs/lpass-va-macro.c
@@ -1434,8 +1434,10 @@ static int va_macro_probe(struct platform_device *pdev)
va->dmic_clk_div = VA_MACRO_CLK_DIV_2;
} else {
ret = va_macro_validate_dmic_sample_rate(sample_rate, va);
- if (!ret)
- return -EINVAL;
+ if (!ret) {
+ ret = -EINVAL;
+ goto err;
+ }
}
base = devm_platform_ioremap_resource(pdev, 0);
@@ -1492,6 +1494,8 @@ err_mclk:
err_dcodec:
clk_disable_unprepare(va->macro);
err:
+ lpass_macro_pds_exit(va->pds);
+
return ret;
}
diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index b45ec35cd63c..62b41ca050a2 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c
@@ -413,6 +413,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
val = (val >> mc->shift) & mask;
+ if (sel < 0 || sel > mc->max)
+ return -EINVAL;
+
*select = sel;
/* Setting a volume is only valid if it is already On */
@@ -427,7 +430,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol,
mask << mc->shift,
sel << mc->shift);
- return 0;
+ return *select != val;
}
static const char *max98090_perf_pwr_text[] =
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index 9ad7fc0baf07..20a07c92b2fc 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -1206,9 +1206,16 @@ static int msm8916_wcd_digital_probe(struct platform_device *pdev)
dev_set_drvdata(dev, priv);
- return devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
+ ret = devm_snd_soc_register_component(dev, &msm8916_wcd_digital,
msm8916_wcd_digital_dai,
ARRAY_SIZE(msm8916_wcd_digital_dai));
+ if (ret)
+ goto err_mclk;
+
+ return 0;
+
+err_mclk:
+ clk_disable_unprepare(priv->mclk);
err_clk:
clk_disable_unprepare(priv->ahbclk);
return ret;
diff --git a/sound/soc/codecs/rk817_codec.c b/sound/soc/codecs/rk817_codec.c
index 8fffe378618d..cce6f4e7992f 100644
--- a/sound/soc/codecs/rk817_codec.c
+++ b/sound/soc/codecs/rk817_codec.c
@@ -489,7 +489,7 @@ static int rk817_platform_probe(struct platform_device *pdev)
rk817_codec_parse_dt_property(&pdev->dev, rk817_codec_data);
- rk817_codec_data->mclk = clk_get(pdev->dev.parent, "mclk");
+ rk817_codec_data->mclk = devm_clk_get(pdev->dev.parent, "mclk");
if (IS_ERR(rk817_codec_data->mclk)) {
dev_dbg(&pdev->dev, "Unable to get mclk\n");
ret = -ENXIO;
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index be68d573a490..2b6c6d6b9771 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -1100,6 +1100,15 @@ void rt5682_jack_detect_handler(struct work_struct *work)
return;
}
+ if (rt5682->is_sdw) {
+ if (pm_runtime_status_suspended(rt5682->slave->dev.parent)) {
+ dev_dbg(&rt5682->slave->dev,
+ "%s: parent device is pm_runtime_status_suspended, skipping jack detection\n",
+ __func__);
+ return;
+ }
+ }
+
dapm = snd_soc_component_get_dapm(rt5682->component);
snd_soc_dapm_mutex_lock(dapm);
@@ -2822,14 +2831,11 @@ static int rt5682_bclk_set_rate(struct clk_hw *hw, unsigned long rate,
for_each_component_dais(component, dai)
if (dai->id == RT5682_AIF1)
- break;
- if (!dai) {
- dev_err(rt5682->i2c_dev, "dai %d not found in component\n",
- RT5682_AIF1);
- return -ENODEV;
- }
+ return rt5682_set_bclk1_ratio(dai, factor);
- return rt5682_set_bclk1_ratio(dai, factor);
+ dev_err(rt5682->i2c_dev, "dai %d not found in component\n",
+ RT5682_AIF1);
+ return -ENODEV;
}
static const struct clk_ops rt5682_dai_clk_ops[RT5682_DAI_NUM_CLKS] = {
diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c
index 1cba8ec7cedb..b55f3ac3a267 100644
--- a/sound/soc/codecs/rt5682s.c
+++ b/sound/soc/codecs/rt5682s.c
@@ -2687,14 +2687,11 @@ static int rt5682s_bclk_set_rate(struct clk_hw *hw, unsigned long rate,
for_each_component_dais(component, dai)
if (dai->id == RT5682S_AIF1)
- break;
- if (!dai) {
- dev_err(component->dev, "dai %d not found in component\n",
- RT5682S_AIF1);
- return -ENODEV;
- }
+ return rt5682s_set_bclk1_ratio(dai, factor);
- return rt5682s_set_bclk1_ratio(dai, factor);
+ dev_err(component->dev, "dai %d not found in component\n",
+ RT5682S_AIF1);
+ return -ENODEV;
}
static const struct clk_ops rt5682s_dai_clk_ops[RT5682S_DAI_NUM_CLKS] = {
diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c
index 6770825d037a..ea25fd58d43a 100644
--- a/sound/soc/codecs/rt711.c
+++ b/sound/soc/codecs/rt711.c
@@ -245,6 +245,13 @@ static void rt711_jack_detect_handler(struct work_struct *work)
if (!rt711->component->card->instantiated)
return;
+ if (pm_runtime_status_suspended(rt711->slave->dev.parent)) {
+ dev_dbg(&rt711->slave->dev,
+ "%s: parent device is pm_runtime_status_suspended, skipping jack detection\n",
+ __func__);
+ return;
+ }
+
reg = RT711_VERB_GET_PIN_SENSE | RT711_HP_OUT;
ret = regmap_read(rt711->regmap, reg, &jack_status);
if (ret < 0)
diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c
index 7aa1772a915f..6e0d7cf0c8c9 100644
--- a/sound/soc/codecs/rt9120.c
+++ b/sound/soc/codecs/rt9120.c
@@ -341,7 +341,6 @@ static int rt9120_get_reg_size(unsigned int reg)
{
switch (reg) {
case 0x00:
- case 0x09:
case 0x20 ... 0x27:
return 2;
case 0x30 ... 0x3D:
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
index 1e75e93cf28f..6298ebe96e94 100644
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -1274,29 +1274,7 @@ static int wcd934x_set_sido_input_src(struct wcd934x_codec *wcd, int sido_src)
if (sido_src == wcd->sido_input_src)
return 0;
- if (sido_src == SIDO_SOURCE_INTERNAL) {
- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
- WCD934X_ANA_BUCK_HI_ACCU_EN_MASK, 0);
- usleep_range(100, 110);
- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
- WCD934X_ANA_BUCK_HI_ACCU_PRE_ENX_MASK, 0x0);
- usleep_range(100, 110);
- regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
- WCD934X_ANA_RCO_BG_EN_MASK, 0);
- usleep_range(100, 110);
- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
- WCD934X_ANA_BUCK_PRE_EN1_MASK,
- WCD934X_ANA_BUCK_PRE_EN1_ENABLE);
- usleep_range(100, 110);
- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
- WCD934X_ANA_BUCK_PRE_EN2_MASK,
- WCD934X_ANA_BUCK_PRE_EN2_ENABLE);
- usleep_range(100, 110);
- regmap_update_bits(wcd->regmap, WCD934X_ANA_BUCK_CTL,
- WCD934X_ANA_BUCK_HI_ACCU_EN_MASK,
- WCD934X_ANA_BUCK_HI_ACCU_ENABLE);
- usleep_range(100, 110);
- } else if (sido_src == SIDO_SOURCE_RCO_BG) {
+ if (sido_src == SIDO_SOURCE_RCO_BG) {
regmap_update_bits(wcd->regmap, WCD934X_ANA_RCO,
WCD934X_ANA_RCO_BG_EN_MASK,
WCD934X_ANA_RCO_BG_ENABLE);
@@ -1382,8 +1360,6 @@ static int wcd934x_disable_ana_bias_and_syclk(struct wcd934x_codec *wcd)
regmap_update_bits(wcd->regmap, WCD934X_CLK_SYS_MCLK_PRG,
WCD934X_EXT_CLK_BUF_EN_MASK |
WCD934X_MCLK_EN_MASK, 0x0);
- wcd934x_set_sido_input_src(wcd, SIDO_SOURCE_INTERNAL);
-
regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
WCD934X_ANA_BIAS_EN_MASK, 0);
regmap_update_bits(wcd->regmap, WCD934X_ANA_BIAS,
diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
index 5d4949c2ec9b..b14c6d104e6d 100644
--- a/sound/soc/codecs/wm8731.c
+++ b/sound/soc/codecs/wm8731.c
@@ -602,7 +602,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731)
ret = wm8731_reset(wm8731->regmap);
if (ret < 0) {
dev_err(dev, "Failed to issue reset: %d\n", ret);
- goto err_regulator_enable;
+ goto err;
}
/* Clear POWEROFF, keep everything else disabled */
@@ -619,10 +619,7 @@ static int wm8731_hw_init(struct device *dev, struct wm8731_priv *wm8731)
regcache_mark_dirty(wm8731->regmap);
-err_regulator_enable:
- /* Regulators will be enabled by bias management */
- regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies);
-
+err:
return ret;
}
@@ -760,21 +757,27 @@ static int wm8731_i2c_probe(struct i2c_client *i2c,
ret = PTR_ERR(wm8731->regmap);
dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
ret);
- return ret;
+ goto err_regulator_enable;
}
ret = wm8731_hw_init(&i2c->dev, wm8731);
if (ret != 0)
- return ret;
+ goto err_regulator_enable;
ret = devm_snd_soc_register_component(&i2c->dev,
&soc_component_dev_wm8731, &wm8731_dai, 1);
if (ret != 0) {
dev_err(&i2c->dev, "Failed to register CODEC: %d\n", ret);
- return ret;
+ goto err_regulator_enable;
}
return 0;
+
+err_regulator_enable:
+ /* Regulators will be enabled by bias management */
+ regulator_bulk_disable(ARRAY_SIZE(wm8731->supplies), wm8731->supplies);
+
+ return ret;
}
static const struct i2c_device_id wm8731_i2c_id[] = {
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index e4018ba3b19a..7878c7a58ff1 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -530,7 +530,7 @@ static int wm8958_mbc_put(struct snd_kcontrol *kcontrol,
wm8958_dsp_apply(component, mbc, wm8994->mbc_ena[mbc]);
- return 0;
+ return 1;
}
#define WM8958_MBC_SWITCH(xname, xval) {\
@@ -656,7 +656,7 @@ static int wm8958_vss_put(struct snd_kcontrol *kcontrol,
wm8958_dsp_apply(component, vss, wm8994->vss_ena[vss]);
- return 0;
+ return 1;
}
@@ -730,7 +730,7 @@ static int wm8958_hpf_put(struct snd_kcontrol *kcontrol,
wm8958_dsp_apply(component, hpf % 3, ucontrol->value.integer.value[0]);
- return 0;
+ return 1;
}
#define WM8958_HPF_SWITCH(xname, xval) {\
@@ -824,7 +824,7 @@ static int wm8958_enh_eq_put(struct snd_kcontrol *kcontrol,
wm8958_dsp_apply(component, eq, ucontrol->value.integer.value[0]);
- return 0;
+ return 1;
}
#define WM8958_ENH_EQ_SWITCH(xname, xval) {\
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 4650a6931a94..ffc24afb5a7a 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -372,7 +372,7 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq)
continue;
if (ratio == 1 && !support_1_1_ratio)
continue;
- else if (ratio & 1)
+ if ((ratio & 1) && ratio > 1)
continue;
diff = abs((long)clk_rate - ratio * freq);
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 8e037835bc58..da0c27828ce6 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -322,7 +322,7 @@ void asoc_simple_shutdown(struct snd_pcm_substream *substream)
if (props->mclk_fs && !dai->clk_fixed && !snd_soc_dai_active(cpu_dai))
snd_soc_dai_set_sysclk(cpu_dai,
- 0, 0, SND_SOC_CLOCK_IN);
+ 0, 0, SND_SOC_CLOCK_OUT);
asoc_simple_clk_disable(dai);
}
@@ -364,13 +364,15 @@ static int asoc_simple_set_tdm(struct snd_soc_dai *dai,
struct snd_pcm_hw_params *params)
{
int sample_bits = params_width(params);
- int slot_width = simple_dai->slot_width;
- int slot_count = simple_dai->slots;
+ int slot_width, slot_count;
int i, ret;
if (!simple_dai || !simple_dai->tdm_width_map)
return 0;
+ slot_width = simple_dai->slot_width;
+ slot_count = simple_dai->slots;
+
if (slot_width == 0)
slot_width = sample_bits;
diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c
index 5e0529aa4f1d..9d617831dd20 100644
--- a/sound/soc/intel/boards/sof_es8336.c
+++ b/sound/soc/intel/boards/sof_es8336.c
@@ -27,9 +27,11 @@
#define SOF_ES8336_SSP_CODEC(quirk) ((quirk) & GENMASK(3, 0))
#define SOF_ES8336_SSP_CODEC_MASK (GENMASK(3, 0))
-#define SOF_ES8336_TGL_GPIO_QUIRK BIT(4)
+#define SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK BIT(4)
#define SOF_ES8336_ENABLE_DMIC BIT(5)
#define SOF_ES8336_JD_INVERTED BIT(6)
+#define SOF_ES8336_HEADPHONE_GPIO BIT(7)
+#define SOC_ES8336_HEADSET_MIC1 BIT(8)
static unsigned long quirk;
@@ -39,7 +41,7 @@ MODULE_PARM_DESC(quirk, "Board-specific quirk override");
struct sof_es8336_private {
struct device *codec_dev;
- struct gpio_desc *gpio_pa;
+ struct gpio_desc *gpio_speakers, *gpio_headphone;
struct snd_soc_jack jack;
struct list_head hdmi_pcm_list;
bool speaker_en;
@@ -51,19 +53,31 @@ struct sof_hdmi_pcm {
int device;
};
-static const struct acpi_gpio_params pa_enable_gpio = { 0, 0, true };
-static const struct acpi_gpio_mapping acpi_es8336_gpios[] = {
- { "pa-enable-gpios", &pa_enable_gpio, 1 },
+static const struct acpi_gpio_params enable_gpio0 = { 0, 0, true };
+static const struct acpi_gpio_params enable_gpio1 = { 1, 0, true };
+
+static const struct acpi_gpio_mapping acpi_speakers_enable_gpio0[] = {
+ { "speakers-enable-gpios", &enable_gpio0, 1 },
{ }
};
-static const struct acpi_gpio_params quirk_pa_enable_gpio = { 1, 0, true };
-static const struct acpi_gpio_mapping quirk_acpi_es8336_gpios[] = {
- { "pa-enable-gpios", &quirk_pa_enable_gpio, 1 },
+static const struct acpi_gpio_mapping acpi_speakers_enable_gpio1[] = {
+ { "speakers-enable-gpios", &enable_gpio1, 1 },
+};
+
+static const struct acpi_gpio_mapping acpi_enable_both_gpios[] = {
+ { "speakers-enable-gpios", &enable_gpio0, 1 },
+ { "headphone-enable-gpios", &enable_gpio1, 1 },
{ }
};
-static const struct acpi_gpio_mapping *gpio_mapping = acpi_es8336_gpios;
+static const struct acpi_gpio_mapping acpi_enable_both_gpios_rev_order[] = {
+ { "speakers-enable-gpios", &enable_gpio1, 1 },
+ { "headphone-enable-gpios", &enable_gpio0, 1 },
+ { }
+};
+
+static const struct acpi_gpio_mapping *gpio_mapping = acpi_speakers_enable_gpio0;
static void log_quirks(struct device *dev)
{
@@ -71,10 +85,14 @@ static void log_quirks(struct device *dev)
dev_info(dev, "quirk SSP%ld\n", SOF_ES8336_SSP_CODEC(quirk));
if (quirk & SOF_ES8336_ENABLE_DMIC)
dev_info(dev, "quirk DMIC enabled\n");
- if (quirk & SOF_ES8336_TGL_GPIO_QUIRK)
- dev_info(dev, "quirk TGL GPIO enabled\n");
+ if (quirk & SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK)
+ dev_info(dev, "Speakers GPIO1 quirk enabled\n");
+ if (quirk & SOF_ES8336_HEADPHONE_GPIO)
+ dev_info(dev, "quirk headphone GPIO enabled\n");
if (quirk & SOF_ES8336_JD_INVERTED)
dev_info(dev, "quirk JD inverted enabled\n");
+ if (quirk & SOC_ES8336_HEADSET_MIC1)
+ dev_info(dev, "quirk headset at mic1 port enabled\n");
}
static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w,
@@ -83,12 +101,23 @@ static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w,
struct snd_soc_card *card = w->dapm->card;
struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card);
+ if (priv->speaker_en == !SND_SOC_DAPM_EVENT_ON(event))
+ return 0;
+
+ priv->speaker_en = !SND_SOC_DAPM_EVENT_ON(event);
+
+ if (SND_SOC_DAPM_EVENT_ON(event))
+ msleep(70);
+
+ gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en);
+
+ if (!(quirk & SOF_ES8336_HEADPHONE_GPIO))
+ return 0;
+
if (SND_SOC_DAPM_EVENT_ON(event))
- priv->speaker_en = false;
- else
- priv->speaker_en = true;
+ msleep(70);
- gpiod_set_value_cansleep(priv->gpio_pa, priv->speaker_en);
+ gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en);
return 0;
}
@@ -114,18 +143,23 @@ static const struct snd_soc_dapm_route sof_es8316_audio_map[] = {
/*
* There is no separate speaker output instead the speakers are muxed to
- * the HP outputs. The mux is controlled by the "Speaker Power" supply.
+ * the HP outputs. The mux is controlled Speaker and/or headphone switch.
*/
{"Speaker", NULL, "HPOL"},
{"Speaker", NULL, "HPOR"},
{"Speaker", NULL, "Speaker Power"},
};
-static const struct snd_soc_dapm_route sof_es8316_intmic_in1_map[] = {
+static const struct snd_soc_dapm_route sof_es8316_headset_mic2_map[] = {
{"MIC1", NULL, "Internal Mic"},
{"MIC2", NULL, "Headset Mic"},
};
+static const struct snd_soc_dapm_route sof_es8316_headset_mic1_map[] = {
+ {"MIC2", NULL, "Internal Mic"},
+ {"MIC1", NULL, "Headset Mic"},
+};
+
static const struct snd_soc_dapm_route dmic_map[] = {
/* digital mics */
{"DMic", NULL, "SoC DMIC"},
@@ -199,8 +233,13 @@ static int sof_es8316_init(struct snd_soc_pcm_runtime *runtime)
card->dapm.idle_bias_off = true;
- custom_map = sof_es8316_intmic_in1_map;
- num_routes = ARRAY_SIZE(sof_es8316_intmic_in1_map);
+ if (quirk & SOC_ES8336_HEADSET_MIC1) {
+ custom_map = sof_es8316_headset_mic1_map;
+ num_routes = ARRAY_SIZE(sof_es8316_headset_mic1_map);
+ } else {
+ custom_map = sof_es8316_headset_mic2_map;
+ num_routes = ARRAY_SIZE(sof_es8316_headset_mic2_map);
+ }
ret = snd_soc_dapm_add_routes(&card->dapm, custom_map, num_routes);
if (ret)
@@ -233,8 +272,14 @@ static int sof_es8336_quirk_cb(const struct dmi_system_id *id)
{
quirk = (unsigned long)id->driver_data;
- if (quirk & SOF_ES8336_TGL_GPIO_QUIRK)
- gpio_mapping = quirk_acpi_es8336_gpios;
+ if (quirk & SOF_ES8336_HEADPHONE_GPIO) {
+ if (quirk & SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK)
+ gpio_mapping = acpi_enable_both_gpios;
+ else
+ gpio_mapping = acpi_enable_both_gpios_rev_order;
+ } else if (quirk & SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK) {
+ gpio_mapping = acpi_speakers_enable_gpio1;
+ }
return 1;
}
@@ -257,7 +302,16 @@ static const struct dmi_system_id sof_es8336_quirk_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "IP3 tech"),
DMI_MATCH(DMI_BOARD_NAME, "WN1"),
},
- .driver_data = (void *)(SOF_ES8336_TGL_GPIO_QUIRK)
+ .driver_data = (void *)(SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK)
+ },
+ {
+ .callback = sof_es8336_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HUAWEI"),
+ DMI_MATCH(DMI_BOARD_NAME, "BOHB-WAX9-PCB-B2"),
+ },
+ .driver_data = (void *)(SOF_ES8336_HEADPHONE_GPIO |
+ SOC_ES8336_HEADSET_MIC1)
},
{}
};
@@ -585,10 +639,17 @@ static int sof_es8336_probe(struct platform_device *pdev)
if (ret)
dev_warn(codec_dev, "unable to add GPIO mapping table\n");
- priv->gpio_pa = gpiod_get_optional(codec_dev, "pa-enable", GPIOD_OUT_LOW);
- if (IS_ERR(priv->gpio_pa)) {
- ret = dev_err_probe(dev, PTR_ERR(priv->gpio_pa),
- "could not get pa-enable GPIO\n");
+ priv->gpio_speakers = gpiod_get_optional(codec_dev, "speakers-enable", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->gpio_speakers)) {
+ ret = dev_err_probe(dev, PTR_ERR(priv->gpio_speakers),
+ "could not get speakers-enable GPIO\n");
+ goto err_put_codec;
+ }
+
+ priv->gpio_headphone = gpiod_get_optional(codec_dev, "headphone-enable", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->gpio_headphone)) {
+ ret = dev_err_probe(dev, PTR_ERR(priv->gpio_headphone),
+ "could not get headphone-enable GPIO\n");
goto err_put_codec;
}
@@ -604,7 +665,7 @@ static int sof_es8336_probe(struct platform_device *pdev)
ret = devm_snd_soc_register_card(dev, card);
if (ret) {
- gpiod_put(priv->gpio_pa);
+ gpiod_put(priv->gpio_speakers);
dev_err(dev, "snd_soc_register_card failed: %d\n", ret);
goto err_put_codec;
}
@@ -622,7 +683,7 @@ static int sof_es8336_remove(struct platform_device *pdev)
struct snd_soc_card *card = platform_get_drvdata(pdev);
struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card);
- gpiod_put(priv->gpio_pa);
+ gpiod_put(priv->gpio_speakers);
device_remove_software_node(priv->codec_dev);
put_device(priv->codec_dev);
diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
index ebec4d15edaa..7126fcb63d90 100644
--- a/sound/soc/intel/boards/sof_rt5682.c
+++ b/sound/soc/intel/boards/sof_rt5682.c
@@ -212,6 +212,19 @@ static const struct dmi_system_id sof_rt5682_quirk_table[] = {
SOF_SSP_BT_OFFLOAD_PRESENT),
},
+ {
+ .callback = sof_rt5682_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"),
+ DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98360_ALC5682I_I2S_AMP_SSP2"),
+ },
+ .driver_data = (void *)(SOF_RT5682_MCLK_EN |
+ SOF_RT5682_SSP_CODEC(0) |
+ SOF_SPEAKER_AMP_PRESENT |
+ SOF_MAX98360A_SPEAKER_AMP_PRESENT |
+ SOF_RT5682_SSP_AMP(2) |
+ SOF_RT5682_NUM_HDMIDEV(4)),
+ },
{}
};
diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
index 6edc9b7108cd..ef19150e7b2e 100644
--- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
@@ -132,13 +132,13 @@ static const struct snd_soc_acpi_adr_device mx8373_1_adr[] = {
{
.adr = 0x000123019F837300ull,
.num_endpoints = 1,
- .endpoints = &spk_l_endpoint,
+ .endpoints = &spk_r_endpoint,
.name_prefix = "Right"
},
{
.adr = 0x000127019F837300ull,
.num_endpoints = 1,
- .endpoints = &spk_r_endpoint,
+ .endpoints = &spk_l_endpoint,
.name_prefix = "Left"
}
};
diff --git a/sound/soc/meson/aiu-acodec-ctrl.c b/sound/soc/meson/aiu-acodec-ctrl.c
index 27a6d3259c50..3776b073a3db 100644
--- a/sound/soc/meson/aiu-acodec-ctrl.c
+++ b/sound/soc/meson/aiu-acodec-ctrl.c
@@ -58,7 +58,7 @@ static int aiu_acodec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol,
snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
- return 0;
+ return 1;
}
static SOC_ENUM_SINGLE_DECL(aiu_acodec_ctrl_mux_enum, AIU_ACODEC_CTRL,
@@ -193,6 +193,9 @@ static const struct snd_soc_component_driver aiu_acodec_ctrl_component = {
.of_xlate_dai_name = aiu_acodec_of_xlate_dai_name,
.endianness = 1,
.non_legacy_dai_naming = 1,
+#ifdef CONFIG_DEBUG_FS
+ .debugfs_prefix = "acodec",
+#endif
};
int aiu_acodec_ctrl_register_component(struct device *dev)
diff --git a/sound/soc/meson/aiu-codec-ctrl.c b/sound/soc/meson/aiu-codec-ctrl.c
index c3ea733fce91..286ac4983d40 100644
--- a/sound/soc/meson/aiu-codec-ctrl.c
+++ b/sound/soc/meson/aiu-codec-ctrl.c
@@ -57,7 +57,7 @@ static int aiu_codec_ctrl_mux_put_enum(struct snd_kcontrol *kcontrol,
snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
- return 0;
+ return 1;
}
static SOC_ENUM_SINGLE_DECL(aiu_hdmi_ctrl_mux_enum, AIU_HDMI_CLK_DATA_CTRL,
@@ -140,6 +140,9 @@ static const struct snd_soc_component_driver aiu_hdmi_ctrl_component = {
.of_xlate_dai_name = aiu_hdmi_of_xlate_dai_name,
.endianness = 1,
.non_legacy_dai_naming = 1,
+#ifdef CONFIG_DEBUG_FS
+ .debugfs_prefix = "hdmi",
+#endif
};
int aiu_hdmi_ctrl_register_component(struct device *dev)
diff --git a/sound/soc/meson/aiu.c b/sound/soc/meson/aiu.c
index d299a70db7e5..88e611e64d14 100644
--- a/sound/soc/meson/aiu.c
+++ b/sound/soc/meson/aiu.c
@@ -103,6 +103,9 @@ static const struct snd_soc_component_driver aiu_cpu_component = {
.pointer = aiu_fifo_pointer,
.probe = aiu_cpu_component_probe,
.remove = aiu_cpu_component_remove,
+#ifdef CONFIG_DEBUG_FS
+ .debugfs_prefix = "cpu",
+#endif
};
static struct snd_soc_dai_driver aiu_cpu_dai_drv[] = {
diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c
index cbbaa55d92a6..2b77010c2c5c 100644
--- a/sound/soc/meson/axg-card.c
+++ b/sound/soc/meson/axg-card.c
@@ -320,7 +320,6 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np,
dai_link->cpus = cpu;
dai_link->num_cpus = 1;
- dai_link->nonatomic = true;
ret = meson_card_parse_dai(card, np, &dai_link->cpus->of_node,
&dai_link->cpus->dai_name);
diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c
index 0c31934a9630..e076ced30025 100644
--- a/sound/soc/meson/axg-tdm-interface.c
+++ b/sound/soc/meson/axg-tdm-interface.c
@@ -351,29 +351,13 @@ static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream,
return 0;
}
-static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream,
- int cmd,
+static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
- struct axg_tdm_stream *ts =
- snd_soc_dai_get_dma_data(dai, substream);
-
- switch (cmd) {
- case SNDRV_PCM_TRIGGER_START:
- case SNDRV_PCM_TRIGGER_RESUME:
- case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
- axg_tdm_stream_start(ts);
- break;
- case SNDRV_PCM_TRIGGER_SUSPEND:
- case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- case SNDRV_PCM_TRIGGER_STOP:
- axg_tdm_stream_stop(ts);
- break;
- default:
- return -EINVAL;
- }
+ struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream);
- return 0;
+ /* Force all attached formatters to update */
+ return axg_tdm_stream_reset(ts);
}
static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai)
@@ -413,8 +397,8 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = {
.set_fmt = axg_tdm_iface_set_fmt,
.startup = axg_tdm_iface_startup,
.hw_params = axg_tdm_iface_hw_params,
+ .prepare = axg_tdm_iface_prepare,
.hw_free = axg_tdm_iface_hw_free,
- .trigger = axg_tdm_iface_trigger,
};
/* TDM Backend DAIs */
diff --git a/sound/soc/meson/g12a-tohdmitx.c b/sound/soc/meson/g12a-tohdmitx.c
index 9b2b59536ced..6c99052feafd 100644
--- a/sound/soc/meson/g12a-tohdmitx.c
+++ b/sound/soc/meson/g12a-tohdmitx.c
@@ -67,7 +67,7 @@ static int g12a_tohdmitx_i2s_mux_put_enum(struct snd_kcontrol *kcontrol,
snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
- return 0;
+ return 1;
}
static SOC_ENUM_SINGLE_DECL(g12a_tohdmitx_i2s_mux_enum, TOHDMITX_CTRL0,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index ce153ac2c3ab..8c7da82a62ca 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2587,6 +2587,11 @@ int snd_soc_component_initialize(struct snd_soc_component *component,
component->dev = dev;
component->driver = driver;
+#ifdef CONFIG_DEBUG_FS
+ if (!component->debugfs_prefix)
+ component->debugfs_prefix = driver->debugfs_prefix;
+#endif
+
return 0;
}
EXPORT_SYMBOL_GPL(snd_soc_component_initialize);
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index b435b5c4cfb7..ca917a849c42 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1687,8 +1687,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
switch (w->id) {
case snd_soc_dapm_pre:
if (!w->event)
- list_for_each_entry_safe_continue(w, n, list,
- power_list);
+ continue;
if (event == SND_SOC_DAPM_STREAM_START)
ret = w->event(w,
@@ -1700,8 +1699,7 @@ static void dapm_seq_run(struct snd_soc_card *card,
case snd_soc_dapm_post:
if (!w->event)
- list_for_each_entry_safe_continue(w, n, list,
- power_list);
+ continue;
if (event == SND_SOC_DAPM_STREAM_START)
ret = w->event(w,
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 2ab2ddc1294d..285441d6aeed 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -86,10 +86,10 @@ static int dmaengine_pcm_hw_params(struct snd_soc_component *component,
memset(&slave_config, 0, sizeof(slave_config));
- if (pcm->config && pcm->config->prepare_slave_config)
- prepare_slave_config = pcm->config->prepare_slave_config;
- else
+ if (!pcm->config)
prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config;
+ else
+ prepare_slave_config = pcm->config->prepare_slave_config;
if (prepare_slave_config) {
int ret = prepare_slave_config(substream, params, &slave_config);
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index a0ca58ba1627..e693070f51fe 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -461,7 +461,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
ret = err;
}
}
- return err;
+ return ret;
}
EXPORT_SYMBOL_GPL(snd_soc_put_volsw_sx);
@@ -519,7 +519,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
unsigned int mask = (1 << fls(max)) - 1;
unsigned int invert = mc->invert;
unsigned int val, val_mask;
- int err, ret;
+ int err, ret, tmp;
+
+ tmp = ucontrol->value.integer.value[0];
+ if (tmp < 0)
+ return -EINVAL;
+ if (mc->platform_max && tmp > mc->platform_max)
+ return -EINVAL;
+ if (tmp > mc->max - mc->min + 1)
+ return -EINVAL;
if (invert)
val = (max - ucontrol->value.integer.value[0]) & mask;
@@ -534,6 +542,14 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol,
ret = err;
if (snd_soc_volsw_is_stereo(mc)) {
+ tmp = ucontrol->value.integer.value[1];
+ if (tmp < 0)
+ return -EINVAL;
+ if (mc->platform_max && tmp > mc->platform_max)
+ return -EINVAL;
+ if (tmp > mc->max - mc->min + 1)
+ return -EINVAL;
+
if (invert)
val = (max - ucontrol->value.integer.value[1]) & mask;
else
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 9a954680d492..11c9853e9e80 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1214,7 +1214,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
be_substream->pcm->nonatomic = 1;
}
- dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_ATOMIC);
+ dpcm = kzalloc(sizeof(struct snd_soc_dpcm), GFP_KERNEL);
if (!dpcm)
return -ENOMEM;
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 72e50df7052c..3bb90a819650 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1436,12 +1436,12 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg,
template.num_kcontrols = le32_to_cpu(w->num_kcontrols);
kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL);
if (!kc)
- goto err;
+ goto hdr_err;
kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int),
GFP_KERNEL);
if (!kcontrol_type)
- goto err;
+ goto hdr_err;
for (i = 0; i < le32_to_cpu(w->num_kcontrols); i++) {
control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos;
diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c
index 4c9596742844..7fa2649e56e5 100644
--- a/sound/soc/sof/sof-pci-dev.c
+++ b/sound/soc/sof/sof-pci-dev.c
@@ -83,7 +83,14 @@ static const struct dmi_system_id sof_tplg_table[] = {
},
.driver_data = "sof-adl-max98357a-rt5682-2way.tplg",
},
-
+ {
+ .callback = sof_tplg_cb,
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_FAMILY, "Google_Brya"),
+ DMI_MATCH(DMI_OEM_STRING, "AUDIO-MAX98360_ALC5682I_I2S_AMP_SSP2"),
+ },
+ .driver_data = "sof-adl-max98357a-rt5682.tplg",
+ },
{}
};
@@ -146,6 +153,11 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
dev_dbg(&pci->dev, "PCI DSP detected");
+ if (!desc) {
+ dev_err(dev, "error: no matching PCI descriptor\n");
+ return -ENODEV;
+ }
+
if (!desc->ops) {
dev_err(dev, "error: no matching PCI descriptor ops\n");
return -ENODEV;
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 9b11e9795a7a..3e5b319b44c7 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -904,8 +904,10 @@ static int sof_control_load(struct snd_soc_component *scomp, int index,
return -ENOMEM;
scontrol->name = kstrdup(hdr->name, GFP_KERNEL);
- if (!scontrol->name)
+ if (!scontrol->name) {
+ kfree(scontrol);
return -ENOMEM;
+ }
scontrol->scomp = scomp;
scontrol->access = kc->access;
@@ -941,11 +943,13 @@ static int sof_control_load(struct snd_soc_component *scomp, int index,
default:
dev_warn(scomp->dev, "control type not supported %d:%d:%d\n",
hdr->ops.get, hdr->ops.put, hdr->ops.info);
+ kfree(scontrol->name);
kfree(scontrol);
return 0;
}
if (ret < 0) {
+ kfree(scontrol->name);
kfree(scontrol);
return ret;
}
@@ -1068,6 +1072,46 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp,
return 0;
}
+static void sof_disconnect_dai_widget(struct snd_soc_component *scomp,
+ struct snd_soc_dapm_widget *w)
+{
+ struct snd_soc_card *card = scomp->card;
+ struct snd_soc_pcm_runtime *rtd;
+ struct snd_soc_dai *cpu_dai;
+ int i;
+
+ if (!w->sname)
+ return;
+
+ list_for_each_entry(rtd, &card->rtd_list, list) {
+ /* does stream match DAI link ? */
+ if (!rtd->dai_link->stream_name ||
+ strcmp(w->sname, rtd->dai_link->stream_name))
+ continue;
+
+ switch (w->id) {
+ case snd_soc_dapm_dai_out:
+ for_each_rtd_cpu_dais(rtd, i, cpu_dai) {
+ if (cpu_dai->capture_widget == w) {
+ cpu_dai->capture_widget = NULL;
+ break;
+ }
+ }
+ break;
+ case snd_soc_dapm_dai_in:
+ for_each_rtd_cpu_dais(rtd, i, cpu_dai) {
+ if (cpu_dai->playback_widget == w) {
+ cpu_dai->playback_widget = NULL;
+ break;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+}
+
/* bind PCM ID to host component ID */
static int spcm_bind(struct snd_soc_component *scomp, struct snd_sof_pcm *spcm,
int dir)
@@ -1353,6 +1397,9 @@ static int sof_widget_unload(struct snd_soc_component *scomp,
if (dai)
list_del(&dai->list);
+
+ sof_disconnect_dai_widget(scomp, widget);
+
break;
default:
break;
@@ -1380,6 +1427,7 @@ static int sof_widget_unload(struct snd_soc_component *scomp,
}
kfree(scontrol->ipc_control_data);
list_del(&scontrol->list);
+ kfree(scontrol->name);
kfree(scontrol);
}
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 2c01649c70f6..7c6ca2b433a5 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1194,6 +1194,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream)
} while (drain_urbs && timeout);
finish_wait(&ep->drain_wait, &wait);
}
+ port->active = 0;
spin_unlock_irq(&ep->buffer_lock);
}
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 64f5544d0a0a..7ef7a8abcc2b 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -599,6 +599,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.id = USB_ID(0x0db0, 0x419c),
.map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
},
+ { /* MSI MAG X570S Torpedo Max */
+ .id = USB_ID(0x0db0, 0xa073),
+ .map = msi_mpg_x570s_carbon_max_wifi_alc4080_map,
+ },
{ /* MSI TRX40 */
.id = USB_ID(0x0db0, 0x543d),
.map = trx40_mobo_map,
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 0ea39565e623..40a5e3eb4ef2 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3235,6 +3235,15 @@ YAMAHA_DEVICE(0x7010, "UB99"),
}
},
+/* Rane SL-1 */
+{
+ USB_DEVICE(0x13e5, 0x0001),
+ .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ }
+},
+
/* disabled due to regression for other devices;
* see https://bugzilla.kernel.org/show_bug.cgi?id=199905
*/
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index ab9f3da49941..fbbe59054c3f 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1822,6 +1822,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */
QUIRK_FLAG_IGNORE_CTL_ERROR),
+ DEVICE_FLG(0x0711, 0x5800, /* MCT Trigger 5 USB-to-HDMI */
+ QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x074d, 0x3553, /* Outlaw RR2150 (Micronas UAC3553B) */
QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */
diff --git a/tools/Makefile b/tools/Makefile
index db2f7b8ebed5..724134f0e56c 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -24,6 +24,7 @@ help:
@echo ' intel-speed-select - Intel Speed Select tool'
@echo ' kvm_stat - top-like utility for displaying kvm statistics'
@echo ' leds - LEDs tools'
+ @echo ' nolibc - nolibc headers testing and installation'
@echo ' objtool - an ELF object analysis tool'
@echo ' pci - PCI tools'
@echo ' perf - Linux performance measurement and analysis tool'
@@ -74,6 +75,9 @@ bpf/%: FORCE
libapi: FORCE
$(call descend,lib/api)
+nolibc_%: FORCE
+ $(call descend,include/nolibc,$(patsubst nolibc_%,%,$@))
+
# The perf build does not follow the descend function setup,
# invoking it via it's own make rule.
PERF_O = $(if $(O),$(O)/tools/perf,)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index ae61f464043a..c6a48d0ef9ff 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -98,6 +98,7 @@ FEATURE_TESTS_EXTRA := \
llvm-version \
clang \
libbpf \
+ libbpf-btf__load_from_kernel_by_id \
libpfm4 \
libdebuginfod \
clang-bpf-co-re
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index de66e1cc0734..cb4a2a4fa2e4 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -57,6 +57,7 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-libbpf.bin \
+ test-libbpf-btf__load_from_kernel_by_id.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
@@ -287,6 +288,9 @@ $(OUTPUT)test-bpf.bin:
$(OUTPUT)test-libbpf.bin:
$(BUILD) -lbpf
+$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
+ $(BUILD) -lbpf
+
$(OUTPUT)test-sdt.bin:
$(BUILD)
diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
new file mode 100644
index 000000000000..f7c084428735
--- /dev/null
+++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ return btf__load_from_kernel_by_id(20151128, NULL);
+}
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index f41d8a0eb1a4..0616409513eb 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -28,7 +28,13 @@ static inline void *kzalloc(size_t size, gfp_t gfp)
return kmalloc(size, gfp | __GFP_ZERO);
}
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+struct list_lru;
+
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *, int flags);
+static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+ return kmem_cache_alloc_lru(cachep, NULL, flags);
+}
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
new file mode 100644
index 000000000000..7a16d917c185
--- /dev/null
+++ b/tools/include/nolibc/Makefile
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc installation and tests
+include ../../scripts/Makefile.include
+
+# we're in ".../tools/include/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/include/,%,$(dir $(CURDIR)))
+endif
+
+nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
+arch_file := arch-$(nolibc_arch).h
+all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
+ sys.h time.h types.h unistd.h
+
+# install all headers needed to support a bare-metal compiler
+all:
+
+# Note: when ARCH is "x86" we concatenate both x86_64 and i386
+headers:
+ $(Q)mkdir -p $(OUTPUT)sysroot
+ $(Q)mkdir -p $(OUTPUT)sysroot/include
+ $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
+ $(Q)if [ "$(ARCH)" = "x86" ]; then \
+ sed -e \
+ 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
+ arch-x86_64.h; \
+ sed -e \
+ 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \
+ arch-i386.h; \
+ elif [ -e "$(arch_file)" ]; then \
+ cat $(arch_file); \
+ else \
+ echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
+ exit 1; \
+ fi > $(OUTPUT)sysroot/include/arch.h
+
+headers_standalone: headers
+ $(Q)$(MAKE) -C $(srctree) headers
+ $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)/sysroot
+
+clean:
+ $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
new file mode 100644
index 000000000000..f68baf8f395f
--- /dev/null
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * AARCH64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_AARCH64_H
+#define _NOLIBC_ARCH_AARCH64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x4000
+
+/* The struct returned by the newfstatat() syscall. Differs slightly from the
+ * x86_64's stat one by field ordering, so be careful.
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+
+ unsigned long st_rdev;
+ unsigned long __pad1;
+ long st_size;
+ int st_blksize;
+ int __pad2;
+
+ long st_blocks;
+ long st_atime;
+ unsigned long st_atime_nsec;
+ long st_mtime;
+
+ unsigned long st_mtime_nsec;
+ long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned int __unused[2];
+};
+
+/* Syscalls for AARCH64 :
+ * - registers are 64-bit
+ * - stack is 16-byte aligned
+ * - syscall number is passed in x8
+ * - arguments are in x0, x1, x2, x3, x4, x5
+ * - the system call is performed by calling svc 0
+ * - syscall return comes in x0.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ *
+ * On aarch64, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0"); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ register long _arg4 __asm__ ("x3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ register long _arg4 __asm__ ("x3") = (long)(arg4); \
+ register long _arg5 __asm__ ("x4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("x8") = (num); \
+ register long _arg1 __asm__ ("x0") = (long)(arg1); \
+ register long _arg2 __asm__ ("x1") = (long)(arg2); \
+ register long _arg3 __asm__ ("x2") = (long)(arg3); \
+ register long _arg4 __asm__ ("x3") = (long)(arg4); \
+ register long _arg5 __asm__ ("x4") = (long)(arg5); \
+ register long _arg6 __asm__ ("x5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ "ldr x0, [sp]\n" // argc (x0) was in the stack
+ "add x1, sp, 8\n" // argv (x1) = sp
+ "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
+ "add x2, x2, 8\n" // + 8 (skip null)
+ "add x2, x2, x1\n" // + argv
+ "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "mov x8, 93\n" // NR_exit == 93
+ "svc #0\n"
+ "");
+
+#endif // _NOLIBC_ARCH_AARCH64_H
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
new file mode 100644
index 000000000000..f31be8e967d6
--- /dev/null
+++ b/tools/include/nolibc/arch-arm.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ARM specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_ARM_H
+#define _NOLIBC_ARCH_ARM_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x4000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array). In big endian, the format
+ * differs as devices are returned as short only.
+ */
+struct sys_stat_struct {
+#if defined(__ARMEB__)
+ unsigned short st_dev;
+ unsigned short __pad1;
+#else
+ unsigned long st_dev;
+#endif
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+
+#if defined(__ARMEB__)
+ unsigned short st_rdev;
+ unsigned short __pad2;
+#else
+ unsigned long st_rdev;
+#endif
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused[2];
+};
+
+/* Syscalls for ARM in ARM or Thumb modes :
+ * - registers are 32-bit
+ * - stack is 8-byte aligned
+ * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
+ * - syscall number is passed in r7
+ * - arguments are in r0, r1, r2, r3, r4, r5
+ * - the system call is performed by calling svc #0
+ * - syscall return comes in r0.
+ * - only lr is clobbered.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, ARM supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0"); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ register long _arg4 __asm__ ("r3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r"(_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("r7") = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ register long _arg4 __asm__ ("r3") = (long)(arg4); \
+ register long _arg5 __asm__ ("r4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "svc #0\n" \
+ : "=r" (_arg1) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+#if defined(__THUMBEB__) || defined(__THUMBEL__)
+ /* We enter here in 32-bit mode but if some previous functions were in
+ * 16-bit mode, the assembler cannot know, so we need to tell it we're in
+ * 32-bit now, then switch to 16-bit (is there a better way to do it than
+ * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
+ * it generates correct instructions. Note that we do not support thumb1.
+ */
+ ".code 32\n"
+ "add r0, pc, #1\n"
+ "bx r0\n"
+ ".code 16\n"
+#endif
+ "pop {%r0}\n" // argc was in the stack
+ "mov %r1, %sp\n" // argv = sp
+ "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
+ "add %r2, %r2, $4\n" // ... + 4
+ "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
+ "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
+ "bl main\n" // main() returns the status code, we'll exit with it.
+ "movs r7, $1\n" // NR_exit == 1
+ "svc $0x00\n"
+ "");
+
+#endif // _NOLIBC_ARCH_ARM_H
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
new file mode 100644
index 000000000000..d7e7212346e2
--- /dev/null
+++ b/tools/include/nolibc/arch-i386.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * i386 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_I386_H
+#define _NOLIBC_ARCH_I386_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
+ * exactly 56 bytes (stops before the unused array).
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+
+ unsigned long st_rdev;
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused[2];
+};
+
+/* Syscalls for i386 :
+ * - mostly similar to x86_64
+ * - registers are 32-bit
+ * - syscall number is passed in eax
+ * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ * - all registers are preserved (except eax of course)
+ * - the system call is performed by calling int $0x80
+ * - syscall return comes in eax
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ register long _arg4 __asm__ ("esi") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ register long _arg4 __asm__ ("esi") = (long)(arg4); \
+ register long _arg5 __asm__ ("edi") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _eax = (long)(num); \
+ long _arg6 = (long)(arg6); /* Always in memory */ \
+ __asm__ volatile ( \
+ "pushl %[_arg6]\n\t" \
+ "pushl %%ebp\n\t" \
+ "movl 4(%%esp),%%ebp\n\t" \
+ "int $0x80\n\t" \
+ "popl %%ebp\n\t" \
+ "addl $4,%%esp\n\t" \
+ : "+a"(_eax) /* %eax */ \
+ : "b"(arg1), /* %ebx */ \
+ "c"(arg2), /* %ecx */ \
+ "d"(arg3), /* %edx */ \
+ "S"(arg4), /* %esi */ \
+ "D"(arg5), /* %edi */ \
+ [_arg6]"m"(_arg6) /* memory */ \
+ : "memory", "cc" \
+ ); \
+ _eax; \
+})
+
+/* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ "pop %eax\n" // argc (first arg, %eax)
+ "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
+ "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
+ "xor %ebp, %ebp\n" // zero the stack frame
+ "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
+ "sub $4, %esp\n" // the call instruction (args are aligned)
+ "push %ecx\n" // push all registers on the stack so that we
+ "push %ebx\n" // support both regparm and plain stack modes
+ "push %eax\n"
+ "call main\n" // main() returns the status code in %eax
+ "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
+ "movl $1, %eax\n" // NR_exit == 1
+ "int $0x80\n" // exit now
+ "hlt\n" // ensure it does not
+ "");
+
+#endif // _NOLIBC_ARCH_I386_H
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
new file mode 100644
index 000000000000..5fc5b8029bff
--- /dev/null
+++ b/tools/include/nolibc/arch-mips.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * MIPS specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_MIPS_H
+#define _NOLIBC_ARCH_MIPS_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_APPEND 0x0008
+#define O_NONBLOCK 0x0080
+#define O_CREAT 0x0100
+#define O_TRUNC 0x0200
+#define O_EXCL 0x0400
+#define O_NOCTTY 0x0800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall. 88 bytes are returned by the
+ * syscall.
+ */
+struct sys_stat_struct {
+ unsigned int st_dev;
+ long st_pad1[3];
+ unsigned long st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned int st_rdev;
+ long st_pad2[2];
+ long st_size;
+ long st_pad3;
+
+ long st_atime;
+ long st_atime_nsec;
+ long st_mtime;
+ long st_mtime_nsec;
+
+ long st_ctime;
+ long st_ctime_nsec;
+ long st_blksize;
+ long st_blocks;
+ long st_pad4[14];
+};
+
+/* Syscalls for MIPS ABI O32 :
+ * - WARNING! there's always a delayed slot!
+ * - WARNING again, the syntax is different, registers take a '$' and numbers
+ * do not.
+ * - registers are 32-bit
+ * - stack is 8-byte aligned
+ * - syscall number is passed in v0 (starts at 0xfa0).
+ * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
+ * leave some room in the stack for the callee to save a0..a3 if needed.
+ * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
+ * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
+ * scall32-o32.S in the kernel sources.
+ * - the system call is performed by calling "syscall"
+ * - syscall return comes in v0, and register a3 needs to be checked to know
+ * if an error occurred, in which case errno is in v0.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "r"(_num) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3"); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r"(_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "syscall\n" \
+ "addiu $sp, $sp, 32\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "addiu $sp, $sp, -32\n" \
+ "sw %7, 16($sp)\n" \
+ "syscall\n " \
+ "addiu $sp, $sp, 32\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+/* startup code, note that it's called __start on MIPS */
+__asm__ (".section .text\n"
+ ".weak __start\n"
+ ".set nomips16\n"
+ ".set noreorder\n"
+ ".option pic0\n"
+ ".ent __start\n"
+ "__start:\n"
+ "lw $a0,($sp)\n" // argc was in the stack
+ "addiu $a1, $sp, 4\n" // argv = sp + 4
+ "sll $a2, $a0, 2\n" // a2 = argc * 4
+ "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
+ "addiu $a2, $a2, 4\n" // ... + 4
+ "li $t0, -8\n"
+ "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
+ "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
+ "jal main\n" // main() returns the status code, we'll exit with it.
+ "nop\n" // delayed slot
+ "move $a0, $v0\n" // retrieve 32-bit exit code from v0
+ "li $v0, 4001\n" // NR_exit == 4001
+ "syscall\n"
+ ".end __start\n"
+ "");
+
+#endif // _NOLIBC_ARCH_MIPS_H
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
new file mode 100644
index 000000000000..95e2b7924925
--- /dev/null
+++ b/tools/include/nolibc/arch-riscv.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * RISCV (32 and 64) specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_RISCV_H
+#define _NOLIBC_ARCH_RISCV_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x100
+#define O_EXCL 0x200
+#define O_NOCTTY 0x400
+#define O_TRUNC 0x1000
+#define O_APPEND 0x2000
+#define O_NONBLOCK 0x4000
+#define O_DIRECTORY 0x200000
+
+struct sys_stat_struct {
+ unsigned long st_dev; /* Device. */
+ unsigned long st_ino; /* File serial number. */
+ unsigned int st_mode; /* File mode. */
+ unsigned int st_nlink; /* Link count. */
+ unsigned int st_uid; /* User ID of the file's owner. */
+ unsigned int st_gid; /* Group ID of the file's group. */
+ unsigned long st_rdev; /* Device number, if device. */
+ unsigned long __pad1;
+ long st_size; /* Size of file, in bytes. */
+ int st_blksize; /* Optimal block size for I/O. */
+ int __pad2;
+ long st_blocks; /* Number 512-byte blocks allocated. */
+ long st_atime; /* Time of last access. */
+ unsigned long st_atime_nsec;
+ long st_mtime; /* Time of last modification. */
+ unsigned long st_mtime_nsec;
+ long st_ctime; /* Time of last status change. */
+ unsigned long st_ctime_nsec;
+ unsigned int __unused4;
+ unsigned int __unused5;
+};
+
+#if __riscv_xlen == 64
+#define PTRLOG "3"
+#define SZREG "8"
+#elif __riscv_xlen == 32
+#define PTRLOG "2"
+#define SZREG "4"
+#endif
+
+/* Syscalls for RISCV :
+ * - stack is 16-byte aligned
+ * - syscall number is passed in a7
+ * - arguments are in a0, a1, a2, a3, a4, a5
+ * - the system call is performed by calling ecall
+ * - syscall return comes in a0
+ * - the arguments are cast to long and assigned into the target
+ * registers which are then simply passed as registers to the asm code,
+ * so that we don't have to experience issues with register constraints.
+ *
+ * On riscv, select() is not implemented so we have to use pselect6().
+ */
+#define __ARCH_WANT_SYS_PSELECT6
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0"); \
+ \
+ __asm__ volatile ( \
+ "ecall\n\t" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "ecall\n\t" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ register long _arg6 __asm__ ("a5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "ecall\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ ".option push\n"
+ ".option norelax\n"
+ "lla gp, __global_pointer$\n"
+ ".option pop\n"
+ "ld a0, 0(sp)\n" // argc (a0) was in the stack
+ "add a1, sp, "SZREG"\n" // argv (a1) = sp
+ "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
+ "add a2, a2, "SZREG"\n" // + SZREG (skip null)
+ "add a2,a2,a1\n" // + argv
+ "andi sp,a1,-16\n" // sp must be 16-byte aligned
+ "call main\n" // main() returns the status code, we'll exit with it.
+ "li a7, 93\n" // NR_exit == 93
+ "ecall\n"
+ "");
+
+#endif // _NOLIBC_ARCH_RISCV_H
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
new file mode 100644
index 000000000000..0e1e9eb8545d
--- /dev/null
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * x86_64 specific definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_X86_64_H
+#define _NOLIBC_ARCH_X86_64_H
+
+/* O_* macros for fcntl/open are architecture-specific */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_CREAT 0x40
+#define O_EXCL 0x80
+#define O_NOCTTY 0x100
+#define O_TRUNC 0x200
+#define O_APPEND 0x400
+#define O_NONBLOCK 0x800
+#define O_DIRECTORY 0x10000
+
+/* The struct returned by the stat() syscall, equivalent to stat64(). The
+ * syscall returns 116 bytes and stops in the middle of __unused.
+ */
+struct sys_stat_struct {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+ unsigned int st_mode;
+ unsigned int st_uid;
+
+ unsigned int st_gid;
+ unsigned int __pad0;
+ unsigned long st_rdev;
+ long st_size;
+ long st_blksize;
+
+ long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ long __unused[3];
+};
+
+/* Syscalls for x86_64 :
+ * - registers are 64-bit
+ * - syscall number is passed in rax
+ * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ * - the system call is performed by calling the syscall instruction
+ * - syscall return comes in rax
+ * - rcx and r11 are clobbered, others are preserved.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ * Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
+ *
+ */
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ register long _arg5 __asm__ ("r8") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ register long _arg5 __asm__ ("r8") = (long)(arg5); \
+ register long _arg6 __asm__ ("r9") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+/* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
+__asm__ (".section .text\n"
+ ".weak _start\n"
+ "_start:\n"
+ "pop %rdi\n" // argc (first arg, %rdi)
+ "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
+ "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
+ "xor %ebp, %ebp\n" // zero the stack frame
+ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
+ "call main\n" // main() returns the status code, we'll exit with it.
+ "mov %eax, %edi\n" // retrieve exit code (32 bit)
+ "mov $60, %eax\n" // NR_exit == 60
+ "syscall\n" // really exit
+ "hlt\n" // ensure it does not return
+ "");
+
+#endif // _NOLIBC_ARCH_X86_64_H
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
new file mode 100644
index 000000000000..4c6992321b0d
--- /dev/null
+++ b/tools/include/nolibc/arch.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+/* Below comes the architecture-specific code. For each architecture, we have
+ * the syscall declarations and the _start code definition. This is the only
+ * global part. On all architectures the kernel puts everything in the stack
+ * before jumping to _start just above us, without any return address (_start
+ * is not a function but an entry pint). So at the stack pointer we find argc.
+ * Then argv[] begins, and ends at the first NULL. Then we have envp which
+ * starts and ends with a NULL as well. So envp=argv+argc+1.
+ */
+
+#ifndef _NOLIBC_ARCH_H
+#define _NOLIBC_ARCH_H
+
+#if defined(__x86_64__)
+#include "arch-x86_64.h"
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+#include "arch-i386.h"
+#elif defined(__ARM_EABI__)
+#include "arch-arm.h"
+#elif defined(__aarch64__)
+#include "arch-aarch64.h"
+#elif defined(__mips__) && defined(_ABIO32)
+#include "arch-mips.h"
+#elif defined(__riscv)
+#include "arch-riscv.h"
+#endif
+
+#endif /* _NOLIBC_ARCH_H */
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
new file mode 100644
index 000000000000..e3000b2992d7
--- /dev/null
+++ b/tools/include/nolibc/ctype.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * ctype function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_CTYPE_H
+#define _NOLIBC_CTYPE_H
+
+#include "std.h"
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int isascii(int c)
+{
+ /* 0x00..0x7f */
+ return (unsigned int)c <= 0x7f;
+}
+
+static __attribute__((unused))
+int isblank(int c)
+{
+ return c == '\t' || c == ' ';
+}
+
+static __attribute__((unused))
+int iscntrl(int c)
+{
+ /* 0x00..0x1f, 0x7f */
+ return (unsigned int)c < 0x20 || c == 0x7f;
+}
+
+static __attribute__((unused))
+int isdigit(int c)
+{
+ return (unsigned int)(c - '0') < 10;
+}
+
+static __attribute__((unused))
+int isgraph(int c)
+{
+ /* 0x21..0x7e */
+ return (unsigned int)(c - 0x21) < 0x5e;
+}
+
+static __attribute__((unused))
+int islower(int c)
+{
+ return (unsigned int)(c - 'a') < 26;
+}
+
+static __attribute__((unused))
+int isprint(int c)
+{
+ /* 0x20..0x7e */
+ return (unsigned int)(c - 0x20) < 0x5f;
+}
+
+static __attribute__((unused))
+int isspace(int c)
+{
+ /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */
+ return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5;
+}
+
+static __attribute__((unused))
+int isupper(int c)
+{
+ return (unsigned int)(c - 'A') < 26;
+}
+
+static __attribute__((unused))
+int isxdigit(int c)
+{
+ return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6;
+}
+
+static __attribute__((unused))
+int isalpha(int c)
+{
+ return islower(c) || isupper(c);
+}
+
+static __attribute__((unused))
+int isalnum(int c)
+{
+ return isalpha(c) || isdigit(c);
+}
+
+static __attribute__((unused))
+int ispunct(int c)
+{
+ return isgraph(c) && !isalnum(c);
+}
+
+#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
new file mode 100644
index 000000000000..06893d6dfb7a
--- /dev/null
+++ b/tools/include/nolibc/errno.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Minimal errno definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ERRNO_H
+#define _NOLIBC_ERRNO_H
+
+#include <asm/errno.h>
+
+/* this way it will be removed if unused */
+static int errno;
+
+#ifndef NOLIBC_IGNORE_ERRNO
+#define SET_ERRNO(v) do { errno = (v); } while (0)
+#else
+#define SET_ERRNO(v) do { } while (0)
+#endif
+
+
+/* errno codes all ensure that they will not conflict with a valid pointer
+ * because they all correspond to the highest addressable memory page.
+ */
+#define MAX_ERRNO 4095
+
+#endif /* _NOLIBC_ERRNO_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index c1c285fe494a..b2bc48d3cfe4 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -57,22 +57,32 @@
* having to specify anything.
*
* Finally some very common libc-level functions are provided. It is the case
- * for a few functions usually found in string.h, ctype.h, or stdlib.h. Nothing
- * is currently provided regarding stdio emulation.
+ * for a few functions usually found in string.h, ctype.h, or stdlib.h.
*
- * The macro NOLIBC is always defined, so that it is possible for a program to
- * check this macro to know if it is being built against and decide to disable
- * some features or simply not to include some standard libc files.
- *
- * Ideally this file should be split in multiple files for easier long term
- * maintenance, but provided as a single file as it is now, it's quite
- * convenient to use. Maybe some variations involving a set of includes at the
- * top could work.
+ * The nolibc.h file is only a convenient entry point which includes all other
+ * files. It also defines the NOLIBC macro, so that it is possible for a
+ * program to check this macro to know if it is being built against and decide
+ * to disable some features or simply not to include some standard libc files.
*
* A simple static executable may be built this way :
* $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
* -static -include nolibc.h -o hello hello.c -lgcc
*
+ * Simple programs meant to be reasonably portable to various libc and using
+ * only a few common includes, may also be built by simply making the include
+ * path point to the nolibc directory:
+ * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ * -I../nolibc -o hello hello.c -lgcc
+ *
+ * The available standard (but limited) include files are:
+ * ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h
+ *
+ * In addition, the following ones are expected to be provided by the compiler:
+ * float.h, stdarg.h, stddef.h
+ *
+ * The following ones which are part to the C standard are not provided:
+ * assert.h, locale.h, math.h, setjmp.h, limits.h
+ *
* A very useful calling convention table may be found here :
* http://man7.org/linux/man-pages/man2/syscall.2.html
*
@@ -80,2502 +90,22 @@
* https://w3challs.com/syscalls/
*
*/
+#ifndef _NOLIBC_H
+#define _NOLIBC_H
-#include <asm/unistd.h>
-#include <asm/ioctls.h>
-#include <asm/errno.h>
-#include <linux/fs.h>
-#include <linux/loop.h>
-#include <linux/time.h>
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "ctype.h"
+#include "signal.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "time.h"
+#include "unistd.h"
+/* Used by programs to avoid std includes */
#define NOLIBC
-/* this way it will be removed if unused */
-static int errno;
-
-#ifndef NOLIBC_IGNORE_ERRNO
-#define SET_ERRNO(v) do { errno = (v); } while (0)
-#else
-#define SET_ERRNO(v) do { } while (0)
-#endif
-
-/* errno codes all ensure that they will not conflict with a valid pointer
- * because they all correspond to the highest addressable memory page.
- */
-#define MAX_ERRNO 4095
-
-/* Declare a few quite common macros and types that usually are in stdlib.h,
- * stdint.h, ctype.h, unistd.h and a few other common locations.
- */
-
-#define NULL ((void *)0)
-
-/* stdint types */
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef unsigned short uint16_t;
-typedef signed short int16_t;
-typedef unsigned int uint32_t;
-typedef signed int int32_t;
-typedef unsigned long long uint64_t;
-typedef signed long long int64_t;
-typedef unsigned long size_t;
-typedef signed long ssize_t;
-typedef unsigned long uintptr_t;
-typedef signed long intptr_t;
-typedef signed long ptrdiff_t;
-
-/* for stat() */
-typedef unsigned int dev_t;
-typedef unsigned long ino_t;
-typedef unsigned int mode_t;
-typedef signed int pid_t;
-typedef unsigned int uid_t;
-typedef unsigned int gid_t;
-typedef unsigned long nlink_t;
-typedef signed long off_t;
-typedef signed long blksize_t;
-typedef signed long blkcnt_t;
-typedef signed long time_t;
-
-/* for poll() */
-struct pollfd {
- int fd;
- short int events;
- short int revents;
-};
-
-/* for getdents64() */
-struct linux_dirent64 {
- uint64_t d_ino;
- int64_t d_off;
- unsigned short d_reclen;
- unsigned char d_type;
- char d_name[];
-};
-
-/* commonly an fd_set represents 256 FDs */
-#define FD_SETSIZE 256
-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set;
-
-/* needed by wait4() */
-struct rusage {
- struct timeval ru_utime;
- struct timeval ru_stime;
- long ru_maxrss;
- long ru_ixrss;
- long ru_idrss;
- long ru_isrss;
- long ru_minflt;
- long ru_majflt;
- long ru_nswap;
- long ru_inblock;
- long ru_oublock;
- long ru_msgsnd;
- long ru_msgrcv;
- long ru_nsignals;
- long ru_nvcsw;
- long ru_nivcsw;
-};
-
-/* stat flags (WARNING, octal here) */
-#define S_IFDIR 0040000
-#define S_IFCHR 0020000
-#define S_IFBLK 0060000
-#define S_IFREG 0100000
-#define S_IFIFO 0010000
-#define S_IFLNK 0120000
-#define S_IFSOCK 0140000
-#define S_IFMT 0170000
-
-#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
-#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
-#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
-#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
-#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
-
-#define DT_UNKNOWN 0
-#define DT_FIFO 1
-#define DT_CHR 2
-#define DT_DIR 4
-#define DT_BLK 6
-#define DT_REG 8
-#define DT_LNK 10
-#define DT_SOCK 12
-
-/* all the *at functions */
-#ifndef AT_FDCWD
-#define AT_FDCWD -100
-#endif
-
-/* lseek */
-#define SEEK_SET 0
-#define SEEK_CUR 1
-#define SEEK_END 2
-
-/* reboot */
-#define LINUX_REBOOT_MAGIC1 0xfee1dead
-#define LINUX_REBOOT_MAGIC2 0x28121969
-#define LINUX_REBOOT_CMD_HALT 0xcdef0123
-#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
-#define LINUX_REBOOT_CMD_RESTART 0x01234567
-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
-
-
-/* The format of the struct as returned by the libc to the application, which
- * significantly differs from the format returned by the stat() syscall flavours.
- */
-struct stat {
- dev_t st_dev; /* ID of device containing file */
- ino_t st_ino; /* inode number */
- mode_t st_mode; /* protection */
- nlink_t st_nlink; /* number of hard links */
- uid_t st_uid; /* user ID of owner */
- gid_t st_gid; /* group ID of owner */
- dev_t st_rdev; /* device ID (if special file) */
- off_t st_size; /* total size, in bytes */
- blksize_t st_blksize; /* blocksize for file system I/O */
- blkcnt_t st_blocks; /* number of 512B blocks allocated */
- time_t st_atime; /* time of last access */
- time_t st_mtime; /* time of last modification */
- time_t st_ctime; /* time of last status change */
-};
-
-#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
-#define WIFEXITED(status) (((status) & 0x7f) == 0)
-
-/* for SIGCHLD */
-#include <asm/signal.h>
-
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry pint). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
-#if defined(__x86_64__)
-/* Syscalls for x86_64 :
- * - registers are 64-bit
- * - syscall number is passed in rax
- * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
- * - the system call is performed by calling the syscall instruction
- * - syscall return comes in rax
- * - rcx and r11 are clobbered, others are preserved.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
- * Calling Conventions.
- *
- * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
- *
- */
-
-#define my_syscall0(num) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- register long _arg4 asm("r10") = (long)(arg4); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- register long _arg4 asm("r10") = (long)(arg4); \
- register long _arg5 asm("r8") = (long)(arg5); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- long _ret; \
- register long _num asm("rax") = (num); \
- register long _arg1 asm("rdi") = (long)(arg1); \
- register long _arg2 asm("rsi") = (long)(arg2); \
- register long _arg3 asm("rdx") = (long)(arg3); \
- register long _arg4 asm("r10") = (long)(arg4); \
- register long _arg5 asm("r8") = (long)(arg5); \
- register long _arg6 asm("r9") = (long)(arg6); \
- \
- asm volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_arg6), "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-/* startup code */
-/*
- * x86-64 System V ABI mandates:
- * 1) %rsp must be 16-byte aligned right before the function call.
- * 2) The deepest stack frame should be zero (the %rbp).
- *
- */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- "pop %rdi\n" // argc (first arg, %rdi)
- "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
- "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
- "xor %ebp, %ebp\n" // zero the stack frame
- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
- "call main\n" // main() returns the status code, we'll exit with it.
- "mov %eax, %edi\n" // retrieve exit code (32 bit)
- "mov $60, %eax\n" // NR_exit == 60
- "syscall\n" // really exit
- "hlt\n" // ensure it does not return
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned long st_nlink;
- unsigned int st_mode;
- unsigned int st_uid;
-
- unsigned int st_gid;
- unsigned int __pad0;
- unsigned long st_rdev;
- long st_size;
- long st_blksize;
-
- long st_blocks;
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
-
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- long __unused[3];
-};
-
-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
-/* Syscalls for i386 :
- * - mostly similar to x86_64
- * - registers are 32-bit
- * - syscall number is passed in eax
- * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
- * - all registers are preserved (except eax of course)
- * - the system call is performed by calling int $0x80
- * - syscall return comes in eax
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- *
- * Also, i386 supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- register long _arg3 asm("edx") = (long)(arg3); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- register long _arg3 asm("edx") = (long)(arg3); \
- register long _arg4 asm("esi") = (long)(arg4); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- long _ret; \
- register long _num asm("eax") = (num); \
- register long _arg1 asm("ebx") = (long)(arg1); \
- register long _arg2 asm("ecx") = (long)(arg2); \
- register long _arg3 asm("edx") = (long)(arg3); \
- register long _arg4 asm("esi") = (long)(arg4); \
- register long _arg5 asm("edi") = (long)(arg5); \
- \
- asm volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-/* startup code */
-/*
- * i386 System V ABI mandates:
- * 1) last pushed argument must be 16-byte aligned.
- * 2) The deepest stack frame should be set to zero
- *
- */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- "pop %eax\n" // argc (first arg, %eax)
- "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
- "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
- "xor %ebp, %ebp\n" // zero the stack frame
- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
- "sub $4, %esp\n" // the call instruction (args are aligned)
- "push %ecx\n" // push all registers on the stack so that we
- "push %ebx\n" // support both regparm and plain stack modes
- "push %eax\n"
- "call main\n" // main() returns the status code in %eax
- "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
- "movl $1, %eax\n" // NR_exit == 1
- "int $0x80\n" // exit now
- "hlt\n" // ensure it does not
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array).
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
-
- unsigned long st_rdev;
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
-
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
-
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused[2];
-};
-
-#elif defined(__ARM_EABI__)
-/* Syscalls for ARM in ARM or Thumb modes :
- * - registers are 32-bit
- * - stack is 8-byte aligned
- * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html)
- * - syscall number is passed in r7
- * - arguments are in r0, r1, r2, r3, r4, r5
- * - the system call is performed by calling svc #0
- * - syscall return comes in r0.
- * - only lr is clobbered.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- *
- * Also, ARM supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0"); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- register long _arg3 asm("r2") = (long)(arg3); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- register long _arg3 asm("r2") = (long)(arg3); \
- register long _arg4 asm("r3") = (long)(arg4); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("r7") = (num); \
- register long _arg1 asm("r0") = (long)(arg1); \
- register long _arg2 asm("r1") = (long)(arg2); \
- register long _arg3 asm("r2") = (long)(arg3); \
- register long _arg4 asm("r3") = (long)(arg4); \
- register long _arg5 asm("r4") = (long)(arg5); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r" (_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_num) \
- : "memory", "cc", "lr" \
- ); \
- _arg1; \
-})
-
-/* startup code */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
-#if defined(__THUMBEB__) || defined(__THUMBEL__)
- /* We enter here in 32-bit mode but if some previous functions were in
- * 16-bit mode, the assembler cannot know, so we need to tell it we're in
- * 32-bit now, then switch to 16-bit (is there a better way to do it than
- * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
- * it generates correct instructions. Note that we do not support thumb1.
- */
- ".code 32\n"
- "add r0, pc, #1\n"
- "bx r0\n"
- ".code 16\n"
-#endif
- "pop {%r0}\n" // argc was in the stack
- "mov %r1, %sp\n" // argv = sp
- "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
- "add %r2, %r2, $4\n" // ... + 4
- "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
- "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
- "bl main\n" // main() returns the status code, we'll exit with it.
- "movs r7, $1\n" // NR_exit == 1
- "svc $0x00\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x4000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array). In big endian, the format
- * differs as devices are returned as short only.
- */
-struct sys_stat_struct {
-#if defined(__ARMEB__)
- unsigned short st_dev;
- unsigned short __pad1;
-#else
- unsigned long st_dev;
-#endif
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
-#if defined(__ARMEB__)
- unsigned short st_rdev;
- unsigned short __pad2;
-#else
- unsigned long st_rdev;
-#endif
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused[2];
-};
-
-#elif defined(__aarch64__)
-/* Syscalls for AARCH64 :
- * - registers are 64-bit
- * - stack is 16-byte aligned
- * - syscall number is passed in x8
- * - arguments are in x0, x1, x2, x3, x4, x5
- * - the system call is performed by calling svc 0
- * - syscall return comes in x0.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- *
- * On aarch64, select() is not implemented so we have to use pselect6().
- */
-#define __ARCH_WANT_SYS_PSELECT6
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0"); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- register long _arg4 asm("x3") = (long)(arg4); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- register long _arg4 asm("x3") = (long)(arg4); \
- register long _arg5 asm("x4") = (long)(arg5); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r" (_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- register long _num asm("x8") = (num); \
- register long _arg1 asm("x0") = (long)(arg1); \
- register long _arg2 asm("x1") = (long)(arg2); \
- register long _arg3 asm("x2") = (long)(arg3); \
- register long _arg4 asm("x3") = (long)(arg4); \
- register long _arg5 asm("x4") = (long)(arg5); \
- register long _arg6 asm("x5") = (long)(arg6); \
- \
- asm volatile ( \
- "svc #0\n" \
- : "=r" (_arg1) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_arg6), "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-/* startup code */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- "ldr x0, [sp]\n" // argc (x0) was in the stack
- "add x1, sp, 8\n" // argv (x1) = sp
- "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
- "add x2, x2, 8\n" // + 8 (skip null)
- "add x2, x2, x1\n" // + argv
- "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
- "bl main\n" // main() returns the status code, we'll exit with it.
- "mov x8, 93\n" // NR_exit == 93
- "svc #0\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x4000
-
-/* The struct returned by the newfstatat() syscall. Differs slightly from the
- * x86_64's stat one by field ordering, so be careful.
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned int st_uid;
- unsigned int st_gid;
-
- unsigned long st_rdev;
- unsigned long __pad1;
- long st_size;
- int st_blksize;
- int __pad2;
-
- long st_blocks;
- long st_atime;
- unsigned long st_atime_nsec;
- long st_mtime;
-
- unsigned long st_mtime_nsec;
- long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned int __unused[2];
-};
-
-#elif defined(__mips__) && defined(_ABIO32)
-/* Syscalls for MIPS ABI O32 :
- * - WARNING! there's always a delayed slot!
- * - WARNING again, the syntax is different, registers take a '$' and numbers
- * do not.
- * - registers are 32-bit
- * - stack is 8-byte aligned
- * - syscall number is passed in v0 (starts at 0xfa0).
- * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to
- * leave some room in the stack for the callee to save a0..a3 if needed.
- * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are
- * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as
- * scall32-o32.S in the kernel sources.
- * - the system call is performed by calling "syscall"
- * - syscall return comes in v0, and register a3 needs to be checked to know
- * if an error occurred, in which case errno is in v0.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "r"(_num) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3"); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r"(_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2), "r"(_arg3) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "syscall\n" \
- "addiu $sp, $sp, 32\n" \
- : "=r" (_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("v0") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- register long _arg5 = (long)(arg5); \
- \
- asm volatile ( \
- "addiu $sp, $sp, -32\n" \
- "sw %7, 16($sp)\n" \
- "syscall\n " \
- "addiu $sp, $sp, 32\n" \
- : "=r" (_num), "=r"(_arg4) \
- : "0"(_num), \
- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
- ); \
- _arg4 ? -_num : _num; \
-})
-
-/* startup code, note that it's called __start on MIPS */
-asm(".section .text\n"
- ".set nomips16\n"
- ".global __start\n"
- ".set noreorder\n"
- ".option pic0\n"
- ".ent __start\n"
- "__start:\n"
- "lw $a0,($sp)\n" // argc was in the stack
- "addiu $a1, $sp, 4\n" // argv = sp + 4
- "sll $a2, $a0, 2\n" // a2 = argc * 4
- "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
- "addiu $a2, $a2, 4\n" // ... + 4
- "li $t0, -8\n"
- "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
- "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
- "jal main\n" // main() returns the status code, we'll exit with it.
- "nop\n" // delayed slot
- "move $a0, $v0\n" // retrieve 32-bit exit code from v0
- "li $v0, 4001\n" // NR_exit == 4001
- "syscall\n"
- ".end __start\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_APPEND 0x0008
-#define O_NONBLOCK 0x0080
-#define O_CREAT 0x0100
-#define O_TRUNC 0x0200
-#define O_EXCL 0x0400
-#define O_NOCTTY 0x0800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall. 88 bytes are returned by the
- * syscall.
- */
-struct sys_stat_struct {
- unsigned int st_dev;
- long st_pad1[3];
- unsigned long st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned int st_uid;
- unsigned int st_gid;
- unsigned int st_rdev;
- long st_pad2[2];
- long st_size;
- long st_pad3;
- long st_atime;
- long st_atime_nsec;
- long st_mtime;
- long st_mtime_nsec;
- long st_ctime;
- long st_ctime_nsec;
- long st_blksize;
- long st_blocks;
- long st_pad4[14];
-};
-
-#elif defined(__riscv)
-
-#if __riscv_xlen == 64
-#define PTRLOG "3"
-#define SZREG "8"
-#elif __riscv_xlen == 32
-#define PTRLOG "2"
-#define SZREG "4"
-#endif
-
-/* Syscalls for RISCV :
- * - stack is 16-byte aligned
- * - syscall number is passed in a7
- * - arguments are in a0, a1, a2, a3, a4, a5
- * - the system call is performed by calling ecall
- * - syscall return comes in a0
- * - the arguments are cast to long and assigned into the target
- * registers which are then simply passed as registers to the asm code,
- * so that we don't have to experience issues with register constraints.
- */
-
-#define my_syscall0(num) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0"); \
- \
- asm volatile ( \
- "ecall\n\t" \
- : "=r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- \
- asm volatile ( \
- "ecall\n\t" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- register long _arg5 asm("a4") = (long)(arg5); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- register long _num asm("a7") = (num); \
- register long _arg1 asm("a0") = (long)(arg1); \
- register long _arg2 asm("a1") = (long)(arg2); \
- register long _arg3 asm("a2") = (long)(arg3); \
- register long _arg4 asm("a3") = (long)(arg4); \
- register long _arg5 asm("a4") = (long)(arg5); \
- register long _arg6 asm("a5") = (long)(arg6); \
- \
- asm volatile ( \
- "ecall\n" \
- : "+r"(_arg1) \
- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
- "r"(_num) \
- : "memory", "cc" \
- ); \
- _arg1; \
-})
-
-/* startup code */
-asm(".section .text\n"
- ".global _start\n"
- "_start:\n"
- ".option push\n"
- ".option norelax\n"
- "lla gp, __global_pointer$\n"
- ".option pop\n"
- "ld a0, 0(sp)\n" // argc (a0) was in the stack
- "add a1, sp, "SZREG"\n" // argv (a1) = sp
- "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
- "add a2, a2, "SZREG"\n" // + SZREG (skip null)
- "add a2,a2,a1\n" // + argv
- "andi sp,a1,-16\n" // sp must be 16-byte aligned
- "call main\n" // main() returns the status code, we'll exit with it.
- "li a7, 93\n" // NR_exit == 93
- "ecall\n"
- "");
-
-/* fcntl / open */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x100
-#define O_EXCL 0x200
-#define O_NOCTTY 0x400
-#define O_TRUNC 0x1000
-#define O_APPEND 0x2000
-#define O_NONBLOCK 0x4000
-#define O_DIRECTORY 0x200000
-
-struct sys_stat_struct {
- unsigned long st_dev; /* Device. */
- unsigned long st_ino; /* File serial number. */
- unsigned int st_mode; /* File mode. */
- unsigned int st_nlink; /* Link count. */
- unsigned int st_uid; /* User ID of the file's owner. */
- unsigned int st_gid; /* Group ID of the file's group. */
- unsigned long st_rdev; /* Device number, if device. */
- unsigned long __pad1;
- long st_size; /* Size of file, in bytes. */
- int st_blksize; /* Optimal block size for I/O. */
- int __pad2;
- long st_blocks; /* Number 512-byte blocks allocated. */
- long st_atime; /* Time of last access. */
- unsigned long st_atime_nsec;
- long st_mtime; /* Time of last modification. */
- unsigned long st_mtime_nsec;
- long st_ctime; /* Time of last status change. */
- unsigned long st_ctime_nsec;
- unsigned int __unused4;
- unsigned int __unused5;
-};
-
-#endif
-
-
-/* Below are the C functions used to declare the raw syscalls. They try to be
- * architecture-agnostic, and return either a success or -errno. Declaring them
- * static will lead to them being inlined in most cases, but it's still possible
- * to reference them by a pointer if needed.
- */
-static __attribute__((unused))
-void *sys_brk(void *addr)
-{
- return (void *)my_syscall1(__NR_brk, addr);
-}
-
-static __attribute__((noreturn,unused))
-void sys_exit(int status)
-{
- my_syscall1(__NR_exit, status & 255);
- while(1); // shut the "noreturn" warnings.
-}
-
-static __attribute__((unused))
-int sys_chdir(const char *path)
-{
- return my_syscall1(__NR_chdir, path);
-}
-
-static __attribute__((unused))
-int sys_chmod(const char *path, mode_t mode)
-{
-#ifdef __NR_fchmodat
- return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
- return my_syscall2(__NR_chmod, path, mode);
-#else
-#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chown(const char *path, uid_t owner, gid_t group)
-{
-#ifdef __NR_fchownat
- return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
- return my_syscall3(__NR_chown, path, owner, group);
-#else
-#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
-#endif
-}
-
-static __attribute__((unused))
-int sys_chroot(const char *path)
-{
- return my_syscall1(__NR_chroot, path);
-}
-
-static __attribute__((unused))
-int sys_close(int fd)
-{
- return my_syscall1(__NR_close, fd);
-}
-
-static __attribute__((unused))
-int sys_dup(int fd)
-{
- return my_syscall1(__NR_dup, fd);
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int sys_dup3(int old, int new, int flags)
-{
- return my_syscall3(__NR_dup3, old, new, flags);
-}
-#endif
-
-static __attribute__((unused))
-int sys_dup2(int old, int new)
-{
-#ifdef __NR_dup3
- return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
- return my_syscall2(__NR_dup2, old, new);
-#else
-#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
-#endif
-}
-
-static __attribute__((unused))
-int sys_execve(const char *filename, char *const argv[], char *const envp[])
-{
- return my_syscall3(__NR_execve, filename, argv, envp);
-}
-
-static __attribute__((unused))
-pid_t sys_fork(void)
-{
-#ifdef __NR_clone
- /* note: some archs only have clone() and not fork(). Different archs
- * have a different API, but most archs have the flags on first arg and
- * will not use the rest with no other flag.
- */
- return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
- return my_syscall0(__NR_fork);
-#else
-#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
-#endif
-}
-
-static __attribute__((unused))
-int sys_fsync(int fd)
-{
- return my_syscall1(__NR_fsync, fd);
-}
-
-static __attribute__((unused))
-int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
- return my_syscall3(__NR_getdents64, fd, dirp, count);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgid(pid_t pid)
-{
- return my_syscall1(__NR_getpgid, pid);
-}
-
-static __attribute__((unused))
-pid_t sys_getpgrp(void)
-{
- return sys_getpgid(0);
-}
-
-static __attribute__((unused))
-pid_t sys_getpid(void)
-{
- return my_syscall0(__NR_getpid);
-}
-
-static __attribute__((unused))
-pid_t sys_gettid(void)
-{
- return my_syscall0(__NR_gettid);
-}
-
-static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- return my_syscall2(__NR_gettimeofday, tv, tz);
-}
-
-static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
-{
- return my_syscall3(__NR_ioctl, fd, req, value);
-}
-
-static __attribute__((unused))
-int sys_kill(pid_t pid, int signal)
-{
- return my_syscall2(__NR_kill, pid, signal);
-}
-
-static __attribute__((unused))
-int sys_link(const char *old, const char *new)
-{
-#ifdef __NR_linkat
- return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
- return my_syscall2(__NR_link, old, new);
-#else
-#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
-#endif
-}
-
-static __attribute__((unused))
-off_t sys_lseek(int fd, off_t offset, int whence)
-{
- return my_syscall3(__NR_lseek, fd, offset, whence);
-}
-
-static __attribute__((unused))
-int sys_mkdir(const char *path, mode_t mode)
-{
-#ifdef __NR_mkdirat
- return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
- return my_syscall2(__NR_mkdir, path, mode);
-#else
-#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
-#endif
-}
-
-static __attribute__((unused))
-long sys_mknod(const char *path, mode_t mode, dev_t dev)
-{
-#ifdef __NR_mknodat
- return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
- return my_syscall3(__NR_mknod, path, mode, dev);
-#else
-#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
-#endif
-}
-
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
- unsigned long flags, const void *data)
-{
- return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
-
-static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
-{
-#ifdef __NR_openat
- return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
- return my_syscall3(__NR_open, path, flags, mode);
-#else
-#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
-#endif
-}
-
-static __attribute__((unused))
-int sys_pivot_root(const char *new, const char *old)
-{
- return my_syscall2(__NR_pivot_root, new, old);
-}
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
- struct timespec t;
-
- if (timeout >= 0) {
- t.tv_sec = timeout / 1000;
- t.tv_nsec = (timeout % 1000) * 1000000;
- }
- return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
-#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
-#endif
-}
-
-static __attribute__((unused))
-ssize_t sys_read(int fd, void *buf, size_t count)
-{
- return my_syscall3(__NR_read, fd, buf, count);
-}
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
- return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int sys_sched_yield(void)
-{
- return my_syscall0(__NR_sched_yield);
-}
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
- struct sel_arg_struct {
- unsigned long n;
- fd_set *r, *w, *e;
- struct timeval *t;
- } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
- return my_syscall1(__NR_select, &arg);
-#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
- struct timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR__newselect) || defined(__NR_select)
-#ifndef __NR__newselect
-#define __NR__newselect __NR_select
-#endif
- return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#else
-#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
-#endif
-}
-
-static __attribute__((unused))
-int sys_setpgid(pid_t pid, pid_t pgid)
-{
- return my_syscall2(__NR_setpgid, pid, pgid);
-}
-
-static __attribute__((unused))
-pid_t sys_setsid(void)
-{
- return my_syscall0(__NR_setsid);
-}
-
-static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
-{
- struct sys_stat_struct stat;
- long ret;
-
-#ifdef __NR_newfstatat
- /* only solution for arm64 */
- ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#elif defined(__NR_stat)
- ret = my_syscall2(__NR_stat, path, &stat);
-#else
-#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
-#endif
- buf->st_dev = stat.st_dev;
- buf->st_ino = stat.st_ino;
- buf->st_mode = stat.st_mode;
- buf->st_nlink = stat.st_nlink;
- buf->st_uid = stat.st_uid;
- buf->st_gid = stat.st_gid;
- buf->st_rdev = stat.st_rdev;
- buf->st_size = stat.st_size;
- buf->st_blksize = stat.st_blksize;
- buf->st_blocks = stat.st_blocks;
- buf->st_atime = stat.st_atime;
- buf->st_mtime = stat.st_mtime;
- buf->st_ctime = stat.st_ctime;
- return ret;
-}
-
-
-static __attribute__((unused))
-int sys_symlink(const char *old, const char *new)
-{
-#ifdef __NR_symlinkat
- return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
- return my_syscall2(__NR_symlink, old, new);
-#else
-#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
-#endif
-}
-
-static __attribute__((unused))
-mode_t sys_umask(mode_t mode)
-{
- return my_syscall1(__NR_umask, mode);
-}
-
-static __attribute__((unused))
-int sys_umount2(const char *path, int flags)
-{
- return my_syscall2(__NR_umount2, path, flags);
-}
-
-static __attribute__((unused))
-int sys_unlink(const char *path)
-{
-#ifdef __NR_unlinkat
- return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
- return my_syscall1(__NR_unlink, path);
-#else
-#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
-#endif
-}
-
-static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-}
-
-static __attribute__((unused))
-pid_t sys_waitpid(pid_t pid, int *status, int options)
-{
- return sys_wait4(pid, status, options, 0);
-}
-
-static __attribute__((unused))
-pid_t sys_wait(int *status)
-{
- return sys_waitpid(-1, status, 0);
-}
-
-static __attribute__((unused))
-ssize_t sys_write(int fd, const void *buf, size_t count)
-{
- return my_syscall3(__NR_write, fd, buf, count);
-}
-
-
-/* Below are the libc-compatible syscalls which return x or -1 and set errno.
- * They rely on the functions above. Similarly they're marked static so that it
- * is possible to assign pointers to them if needed.
- */
-
-static __attribute__((unused))
-int brk(void *addr)
-{
- void *ret = sys_brk(addr);
-
- if (!ret) {
- SET_ERRNO(ENOMEM);
- return -1;
- }
- return 0;
-}
-
-static __attribute__((noreturn,unused))
-void exit(int status)
-{
- sys_exit(status);
-}
-
-static __attribute__((unused))
-int chdir(const char *path)
-{
- int ret = sys_chdir(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int chmod(const char *path, mode_t mode)
-{
- int ret = sys_chmod(path, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int chown(const char *path, uid_t owner, gid_t group)
-{
- int ret = sys_chown(path, owner, group);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int chroot(const char *path)
-{
- int ret = sys_chroot(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int close(int fd)
-{
- int ret = sys_close(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int dup(int fd)
-{
- int ret = sys_dup(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int dup2(int old, int new)
-{
- int ret = sys_dup2(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-#ifdef __NR_dup3
-static __attribute__((unused))
-int dup3(int old, int new, int flags)
-{
- int ret = sys_dup3(old, new, flags);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-#endif
-
-static __attribute__((unused))
-int execve(const char *filename, char *const argv[], char *const envp[])
-{
- int ret = sys_execve(filename, argv, envp);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t fork(void)
-{
- pid_t ret = sys_fork();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int fsync(int fd)
-{
- int ret = sys_fsync(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int getdents64(int fd, struct linux_dirent64 *dirp, int count)
-{
- int ret = sys_getdents64(fd, dirp, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgid(pid_t pid)
-{
- pid_t ret = sys_getpgid(pid);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t getpgrp(void)
-{
- pid_t ret = sys_getpgrp();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t getpid(void)
-{
- pid_t ret = sys_getpid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t gettid(void)
-{
- pid_t ret = sys_gettid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- int ret = sys_gettimeofday(tv, tz);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
- int ret = sys_ioctl(fd, req, value);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int kill(pid_t pid, int signal)
-{
- int ret = sys_kill(pid, signal);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int link(const char *old, const char *new)
-{
- int ret = sys_link(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-off_t lseek(int fd, off_t offset, int whence)
-{
- off_t ret = sys_lseek(fd, offset, whence);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int mkdir(const char *path, mode_t mode)
-{
- int ret = sys_mkdir(path, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int mknod(const char *path, mode_t mode, dev_t dev)
-{
- int ret = sys_mknod(path, mode, dev);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int mount(const char *src, const char *tgt,
- const char *fst, unsigned long flags,
- const void *data)
-{
- int ret = sys_mount(src, tgt, fst, flags, data);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int open(const char *path, int flags, mode_t mode)
-{
- int ret = sys_open(path, flags, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int pivot_root(const char *new, const char *old)
-{
- int ret = sys_pivot_root(new, old);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
- int ret = sys_poll(fds, nfds, timeout);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-ssize_t read(int fd, void *buf, size_t count)
-{
- ssize_t ret = sys_read(fd, buf, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
- int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-void *sbrk(intptr_t inc)
-{
- void *ret;
-
- /* first call to find current end */
- if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
- return ret + inc;
-
- SET_ERRNO(ENOMEM);
- return (void *)-1;
-}
-
-static __attribute__((unused))
-int sched_yield(void)
-{
- int ret = sys_sched_yield();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
- int ret = sys_select(nfds, rfds, wfds, efds, timeout);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int setpgid(pid_t pid, pid_t pgid)
-{
- int ret = sys_setpgid(pid, pgid);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t setsid(void)
-{
- pid_t ret = sys_setsid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-unsigned int sleep(unsigned int seconds)
-{
- struct timeval my_timeval = { seconds, 0 };
-
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
- return my_timeval.tv_sec + !!my_timeval.tv_usec;
- else
- return 0;
-}
-
-static __attribute__((unused))
-int msleep(unsigned int msecs)
-{
- struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
-
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
- return (my_timeval.tv_sec * 1000) +
- (my_timeval.tv_usec / 1000) +
- !!(my_timeval.tv_usec % 1000);
- else
- return 0;
-}
-
-static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
-{
- int ret = sys_stat(path, buf);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int symlink(const char *old, const char *new)
-{
- int ret = sys_symlink(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int tcsetpgrp(int fd, pid_t pid)
-{
- return ioctl(fd, TIOCSPGRP, &pid);
-}
-
-static __attribute__((unused))
-mode_t umask(mode_t mode)
-{
- return sys_umask(mode);
-}
-
-static __attribute__((unused))
-int umount2(const char *path, int flags)
-{
- int ret = sys_umount2(path, flags);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int unlink(const char *path)
-{
- int ret = sys_unlink(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- pid_t ret = sys_wait4(pid, status, options, rusage);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
-{
- pid_t ret = sys_waitpid(pid, status, options);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
- pid_t ret = sys_wait(status);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-ssize_t write(int fd, const void *buf, size_t count)
-{
- ssize_t ret = sys_write(fd, buf, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-/* some size-optimized reimplementations of a few common str* and mem*
- * functions. They're marked static, except memcpy() and raise() which are used
- * by libgcc on ARM, so they are marked weak instead in order not to cause an
- * error when building a program made of multiple files (not recommended).
- */
-
-static __attribute__((unused))
-void *memmove(void *dst, const void *src, size_t len)
-{
- ssize_t pos = (dst <= src) ? -1 : (long)len;
- void *ret = dst;
-
- while (len--) {
- pos += (dst <= src) ? 1 : -1;
- ((char *)dst)[pos] = ((char *)src)[pos];
- }
- return ret;
-}
-
-static __attribute__((unused))
-void *memset(void *dst, int b, size_t len)
-{
- char *p = dst;
-
- while (len--)
- *(p++) = b;
- return dst;
-}
-
-static __attribute__((unused))
-int memcmp(const void *s1, const void *s2, size_t n)
-{
- size_t ofs = 0;
- char c1 = 0;
-
- while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
- ofs++;
- }
- return c1;
-}
-
-static __attribute__((unused))
-char *strcpy(char *dst, const char *src)
-{
- char *ret = dst;
-
- while ((*dst++ = *src++));
- return ret;
-}
-
-static __attribute__((unused))
-char *strchr(const char *s, int c)
-{
- while (*s) {
- if (*s == (char)c)
- return (char *)s;
- s++;
- }
- return NULL;
-}
-
-static __attribute__((unused))
-char *strrchr(const char *s, int c)
-{
- const char *ret = NULL;
-
- while (*s) {
- if (*s == (char)c)
- ret = s;
- s++;
- }
- return (char *)ret;
-}
-
-static __attribute__((unused))
-size_t nolibc_strlen(const char *str)
-{
- size_t len;
-
- for (len = 0; str[len]; len++);
- return len;
-}
-
-#define strlen(str) ({ \
- __builtin_constant_p((str)) ? \
- __builtin_strlen((str)) : \
- nolibc_strlen((str)); \
-})
-
-static __attribute__((unused))
-int isdigit(int c)
-{
- return (unsigned int)(c - '0') <= 9;
-}
-
-static __attribute__((unused))
-long atol(const char *s)
-{
- unsigned long ret = 0;
- unsigned long d;
- int neg = 0;
-
- if (*s == '-') {
- neg = 1;
- s++;
- }
-
- while (1) {
- d = (*s++) - '0';
- if (d > 9)
- break;
- ret *= 10;
- ret += d;
- }
-
- return neg ? -ret : ret;
-}
-
-static __attribute__((unused))
-int atoi(const char *s)
-{
- return atol(s);
-}
-
-static __attribute__((unused))
-const char *ltoa(long in)
-{
- /* large enough for -9223372036854775808 */
- static char buffer[21];
- char *pos = buffer + sizeof(buffer) - 1;
- int neg = in < 0;
- unsigned long n = neg ? -in : in;
-
- *pos-- = '\0';
- do {
- *pos-- = '0' + n % 10;
- n /= 10;
- if (pos < buffer)
- return pos + 1;
- } while (n);
-
- if (neg)
- *pos-- = '-';
- return pos + 1;
-}
-
-__attribute__((weak,unused))
-void *memcpy(void *dst, const void *src, size_t len)
-{
- return memmove(dst, src, len);
-}
-
-/* needed by libgcc for divide by zero */
-__attribute__((weak,unused))
-int raise(int signal)
-{
- return kill(getpid(), signal);
-}
-
-/* Here come a few helper functions */
-
-static __attribute__((unused))
-void FD_ZERO(fd_set *set)
-{
- memset(set, 0, sizeof(*set));
-}
-
-static __attribute__((unused))
-void FD_SET(int fd, fd_set *set)
-{
- if (fd < 0 || fd >= FD_SETSIZE)
- return;
- set->fd32[fd / 32] |= 1 << (fd & 31);
-}
-
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-static __attribute__((unused))
-dev_t makedev(unsigned int major, unsigned int minor)
-{
- return ((major & 0xfff) << 8) | (minor & 0xff);
-}
+#endif /* _NOLIBC_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
new file mode 100644
index 000000000000..ef47e71e2be3
--- /dev/null
+++ b/tools/include/nolibc/signal.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * signal function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SIGNAL_H
+#define _NOLIBC_SIGNAL_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/* This one is not marked static as it's needed by libgcc for divide by zero */
+__attribute__((weak,unused,section(".text.nolibc_raise")))
+int raise(int signal)
+{
+ return sys_kill(sys_getpid(), signal);
+}
+
+#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
new file mode 100644
index 000000000000..1747ae125392
--- /dev/null
+++ b/tools/include/nolibc/std.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Standard definitions and types for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STD_H
+#define _NOLIBC_STD_H
+
+/* Declare a few quite common macros and types that usually are in stdlib.h,
+ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place
+ * integer type definitions and generic macros here, but avoid OS-specific and
+ * syscall-specific stuff, as this file is expected to be included very early.
+ */
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+/* stdint types */
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+typedef unsigned int uint32_t;
+typedef signed int int32_t;
+typedef unsigned long long uint64_t;
+typedef signed long long int64_t;
+typedef unsigned long size_t;
+typedef signed long ssize_t;
+typedef unsigned long uintptr_t;
+typedef signed long intptr_t;
+typedef signed long ptrdiff_t;
+
+/* those are commonly provided by sys/types.h */
+typedef unsigned int dev_t;
+typedef unsigned long ino_t;
+typedef unsigned int mode_t;
+typedef signed int pid_t;
+typedef unsigned int uid_t;
+typedef unsigned int gid_t;
+typedef unsigned long nlink_t;
+typedef signed long off_t;
+typedef signed long blksize_t;
+typedef signed long blkcnt_t;
+typedef signed long time_t;
+
+#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
new file mode 100644
index 000000000000..15dedf8d0902
--- /dev/null
+++ b/tools/include/nolibc/stdio.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * minimal stdio function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDIO_H
+#define _NOLIBC_STDIO_H
+
+#include <stdarg.h>
+
+#include "std.h"
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+#include "sys.h"
+#include "stdlib.h"
+#include "string.h"
+
+#ifndef EOF
+#define EOF (-1)
+#endif
+
+/* just define FILE as a non-empty type */
+typedef struct FILE {
+ char dummy[1];
+} FILE;
+
+/* We define the 3 common stdio files as constant invalid pointers that
+ * are easily recognized.
+ */
+static __attribute__((unused)) FILE* const stdin = (FILE*)-3;
+static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
+static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+
+/* getc(), fgetc(), getchar() */
+
+#define getc(stream) fgetc(stream)
+
+static __attribute__((unused))
+int fgetc(FILE* stream)
+{
+ unsigned char ch;
+ int fd;
+
+ if (stream < stdin || stream > stderr)
+ return EOF;
+
+ fd = 3 + (long)stream;
+
+ if (read(fd, &ch, 1) <= 0)
+ return EOF;
+ return ch;
+}
+
+static __attribute__((unused))
+int getchar(void)
+{
+ return fgetc(stdin);
+}
+
+
+/* putc(), fputc(), putchar() */
+
+#define putc(c, stream) fputc(c, stream)
+
+static __attribute__((unused))
+int fputc(int c, FILE* stream)
+{
+ unsigned char ch = c;
+ int fd;
+
+ if (stream < stdin || stream > stderr)
+ return EOF;
+
+ fd = 3 + (long)stream;
+
+ if (write(fd, &ch, 1) <= 0)
+ return EOF;
+ return ch;
+}
+
+static __attribute__((unused))
+int putchar(int c)
+{
+ return fputc(c, stdout);
+}
+
+
+/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */
+
+/* internal fwrite()-like function which only takes a size and returns 0 on
+ * success or EOF on error. It automatically retries on short writes.
+ */
+static __attribute__((unused))
+int _fwrite(const void *buf, size_t size, FILE *stream)
+{
+ ssize_t ret;
+ int fd;
+
+ if (stream < stdin || stream > stderr)
+ return EOF;
+
+ fd = 3 + (long)stream;
+
+ while (size) {
+ ret = write(fd, buf, size);
+ if (ret <= 0)
+ return EOF;
+ size -= ret;
+ buf += ret;
+ }
+ return 0;
+}
+
+static __attribute__((unused))
+size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream)
+{
+ size_t written;
+
+ for (written = 0; written < nmemb; written++) {
+ if (_fwrite(s, size, stream) != 0)
+ break;
+ s += size;
+ }
+ return written;
+}
+
+static __attribute__((unused))
+int fputs(const char *s, FILE *stream)
+{
+ return _fwrite(s, strlen(s), stream);
+}
+
+static __attribute__((unused))
+int puts(const char *s)
+{
+ if (fputs(s, stdout) == EOF)
+ return EOF;
+ return putchar('\n');
+}
+
+
+/* fgets() */
+static __attribute__((unused))
+char *fgets(char *s, int size, FILE *stream)
+{
+ int ofs;
+ int c;
+
+ for (ofs = 0; ofs + 1 < size;) {
+ c = fgetc(stream);
+ if (c == EOF)
+ break;
+ s[ofs++] = c;
+ if (c == '\n')
+ break;
+ }
+ if (ofs < size)
+ s[ofs] = 0;
+ return ofs ? s : NULL;
+}
+
+
+/* minimal vfprintf(). It supports the following formats:
+ * - %[l*]{d,u,c,x,p}
+ * - %s
+ * - unknown modifiers are ignored.
+ */
+static __attribute__((unused))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+ char escape, lpref, c;
+ unsigned long long v;
+ unsigned int written;
+ size_t len, ofs;
+ char tmpbuf[21];
+ const char *outstr;
+
+ written = ofs = escape = lpref = 0;
+ while (1) {
+ c = fmt[ofs++];
+
+ if (escape) {
+ /* we're in an escape sequence, ofs == 1 */
+ escape = 0;
+ if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
+ char *out = tmpbuf;
+
+ if (c == 'p')
+ v = va_arg(args, unsigned long);
+ else if (lpref) {
+ if (lpref > 1)
+ v = va_arg(args, unsigned long long);
+ else
+ v = va_arg(args, unsigned long);
+ } else
+ v = va_arg(args, unsigned int);
+
+ if (c == 'd') {
+ /* sign-extend the value */
+ if (lpref == 0)
+ v = (long long)(int)v;
+ else if (lpref == 1)
+ v = (long long)(long)v;
+ }
+
+ switch (c) {
+ case 'c':
+ out[0] = v;
+ out[1] = 0;
+ break;
+ case 'd':
+ i64toa_r(v, out);
+ break;
+ case 'u':
+ u64toa_r(v, out);
+ break;
+ case 'p':
+ *(out++) = '0';
+ *(out++) = 'x';
+ /* fall through */
+ default: /* 'x' and 'p' above */
+ u64toh_r(v, out);
+ break;
+ }
+ outstr = tmpbuf;
+ }
+ else if (c == 's') {
+ outstr = va_arg(args, char *);
+ if (!outstr)
+ outstr="(null)";
+ }
+ else if (c == '%') {
+ /* queue it verbatim */
+ continue;
+ }
+ else {
+ /* modifiers or final 0 */
+ if (c == 'l') {
+ /* long format prefix, maintain the escape */
+ lpref++;
+ }
+ escape = 1;
+ goto do_escape;
+ }
+ len = strlen(outstr);
+ goto flush_str;
+ }
+
+ /* not an escape sequence */
+ if (c == 0 || c == '%') {
+ /* flush pending data on escape or end */
+ escape = 1;
+ lpref = 0;
+ outstr = fmt;
+ len = ofs - 1;
+ flush_str:
+ if (_fwrite(outstr, len, stream) != 0)
+ break;
+
+ written += len;
+ do_escape:
+ if (c == 0)
+ break;
+ fmt += ofs;
+ ofs = 0;
+ continue;
+ }
+
+ /* literal char, just queue it */
+ }
+ return written;
+}
+
+static __attribute__((unused))
+int fprintf(FILE *stream, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vfprintf(stream, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static __attribute__((unused))
+int printf(const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vfprintf(stdout, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static __attribute__((unused))
+void perror(const char *msg)
+{
+ fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+}
+
+#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
new file mode 100644
index 000000000000..8fd32eaf8037
--- /dev/null
+++ b/tools/include/nolibc/stdlib.h
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stdlib function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STDLIB_H
+#define _NOLIBC_STDLIB_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+#include "string.h"
+
+struct nolibc_heap {
+ size_t len;
+ char user_p[] __attribute__((__aligned__));
+};
+
+/* Buffer used to store int-to-ASCII conversions. Will only be implemented if
+ * any of the related functions is implemented. The area is large enough to
+ * store "18446744073709551615" or "-9223372036854775808" and the final zero.
+ */
+static __attribute__((unused)) char itoa_buffer[21];
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+/* must be exported, as it's used by libgcc for various divide functions */
+__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
+void abort(void)
+{
+ sys_kill(sys_getpid(), SIGABRT);
+ for (;;);
+}
+
+static __attribute__((unused))
+long atol(const char *s)
+{
+ unsigned long ret = 0;
+ unsigned long d;
+ int neg = 0;
+
+ if (*s == '-') {
+ neg = 1;
+ s++;
+ }
+
+ while (1) {
+ d = (*s++) - '0';
+ if (d > 9)
+ break;
+ ret *= 10;
+ ret += d;
+ }
+
+ return neg ? -ret : ret;
+}
+
+static __attribute__((unused))
+int atoi(const char *s)
+{
+ return atol(s);
+}
+
+static __attribute__((unused))
+void free(void *ptr)
+{
+ struct nolibc_heap *heap;
+
+ if (!ptr)
+ return;
+
+ heap = container_of(ptr, struct nolibc_heap, user_p);
+ munmap(heap, heap->len);
+}
+
+/* getenv() tries to find the environment variable named <name> in the
+ * environment array pointed to by global variable "environ" which must be
+ * declared as a char **, and must be terminated by a NULL (it is recommended
+ * to set this variable to the "envp" argument of main()). If the requested
+ * environment variable exists its value is returned otherwise NULL is
+ * returned. getenv() is forcefully inlined so that the reference to "environ"
+ * will be dropped if unused, even at -O0.
+ */
+static __attribute__((unused))
+char *_getenv(const char *name, char **environ)
+{
+ int idx, i;
+
+ if (environ) {
+ for (idx = 0; environ[idx]; idx++) {
+ for (i = 0; name[i] && name[i] == environ[idx][i];)
+ i++;
+ if (!name[i] && environ[idx][i] == '=')
+ return &environ[idx][i+1];
+ }
+ }
+ return NULL;
+}
+
+static inline __attribute__((unused,always_inline))
+char *getenv(const char *name)
+{
+ extern char **environ;
+ return _getenv(name, environ);
+}
+
+static __attribute__((unused))
+void *malloc(size_t len)
+{
+ struct nolibc_heap *heap;
+
+ /* Always allocate memory with size multiple of 4096. */
+ len = sizeof(*heap) + len;
+ len = (len + 4095UL) & -4096UL;
+ heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
+ -1, 0);
+ if (__builtin_expect(heap == MAP_FAILED, 0))
+ return NULL;
+
+ heap->len = len;
+ return heap->user_p;
+}
+
+static __attribute__((unused))
+void *calloc(size_t size, size_t nmemb)
+{
+ void *orig;
+ size_t res = 0;
+
+ if (__builtin_expect(__builtin_mul_overflow(nmemb, size, &res), 0)) {
+ SET_ERRNO(ENOMEM);
+ return NULL;
+ }
+
+ /*
+ * No need to zero the heap, the MAP_ANONYMOUS in malloc()
+ * already does it.
+ */
+ return malloc(res);
+}
+
+static __attribute__((unused))
+void *realloc(void *old_ptr, size_t new_size)
+{
+ struct nolibc_heap *heap;
+ size_t user_p_len;
+ void *ret;
+
+ if (!old_ptr)
+ return malloc(new_size);
+
+ heap = container_of(old_ptr, struct nolibc_heap, user_p);
+ user_p_len = heap->len - sizeof(*heap);
+ /*
+ * Don't realloc() if @user_p_len >= @new_size, this block of
+ * memory is still enough to handle the @new_size. Just return
+ * the same pointer.
+ */
+ if (user_p_len >= new_size)
+ return old_ptr;
+
+ ret = malloc(new_size);
+ if (__builtin_expect(!ret, 0))
+ return NULL;
+
+ memcpy(ret, heap->user_p, heap->len);
+ munmap(heap, heap->len);
+ return ret;
+}
+
+/* Converts the unsigned long integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The
+ * buffer is filled from the first byte, and the number of characters emitted
+ * (not counting the trailing zero) is returned. The function is constructed
+ * in a way to optimize the code size and avoid any divide that could add a
+ * dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoh_r(unsigned long in, char *buffer)
+{
+ signed char pos = (~0UL > 0xfffffffful) ? 60 : 28;
+ int digits = 0;
+ int dig;
+
+ do {
+ dig = in >> pos;
+ in -= (uint64_t)dig << pos;
+ pos -= 4;
+ if (dig || digits || pos < 0) {
+ if (dig > 9)
+ dig += 'a' - '0' - 10;
+ buffer[digits++] = '0' + dig;
+ }
+ } while (pos >= 0);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* converts unsigned long <in> to an hex string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoh(unsigned long in)
+{
+ utoh_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* Converts the unsigned long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for
+ * 4294967295 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ * The function is constructed in a way to optimize the code size and avoid
+ * any divide that could add a dependency on large external functions.
+ */
+static __attribute__((unused))
+int utoa_r(unsigned long in, char *buffer)
+{
+ unsigned long lim;
+ int digits = 0;
+ int pos = (~0UL > 0xfffffffful) ? 19 : 9;
+ int dig;
+
+ do {
+ for (dig = 0, lim = 1; dig < pos; dig++)
+ lim *= 10;
+
+ if (digits || in >= lim || !pos) {
+ for (dig = 0; in >= lim; dig++)
+ in -= lim;
+ buffer[digits++] = '0' + dig;
+ }
+ } while (pos--);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* Converts the signed long integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for
+ * -2147483648 in 32-bit). The buffer is filled from the first byte, and the
+ * number of characters emitted (not counting the trailing zero) is returned.
+ */
+static __attribute__((unused))
+int itoa_r(long in, char *buffer)
+{
+ char *ptr = buffer;
+ int len = 0;
+
+ if (in < 0) {
+ in = -in;
+ *(ptr++) = '-';
+ len++;
+ }
+ len += utoa_r(in, ptr);
+ return len;
+}
+
+/* for historical compatibility, same as above but returns the pointer to the
+ * buffer.
+ */
+static inline __attribute__((unused))
+char *ltoa_r(long in, char *buffer)
+{
+ itoa_r(in, buffer);
+ return buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *itoa(long in)
+{
+ itoa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* converts long integer <in> to a string using the static itoa_buffer and
+ * returns the pointer to that string. Same as above, for compatibility.
+ */
+static inline __attribute__((unused))
+char *ltoa(long in)
+{
+ itoa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* converts unsigned long integer <in> to a string using the static itoa_buffer
+ * and returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *utoa(unsigned long in)
+{
+ utoa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toh_r(uint64_t in, char *buffer)
+{
+ signed char pos = 60;
+ int digits = 0;
+ int dig;
+
+ do {
+ if (sizeof(long) >= 8) {
+ dig = (in >> pos) & 0xF;
+ } else {
+ /* 32-bit platforms: avoid a 64-bit shift */
+ uint32_t d = (pos >= 32) ? (in >> 32) : in;
+ dig = (d >> (pos & 31)) & 0xF;
+ }
+ if (dig > 9)
+ dig += 'a' - '0' - 10;
+ pos -= 4;
+ if (dig || digits || pos < 0)
+ buffer[digits++] = '0' + dig;
+ } while (pos >= 0);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* converts uint64_t <in> to an hex string using the static itoa_buffer and
+ * returns the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toh(uint64_t in)
+{
+ u64toh_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* Converts the unsigned 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned. The function is constructed in a way to optimize
+ * the code size and avoid any divide that could add a dependency on large
+ * external functions.
+ */
+static __attribute__((unused))
+int u64toa_r(uint64_t in, char *buffer)
+{
+ unsigned long long lim;
+ int digits = 0;
+ int pos = 19; /* start with the highest possible digit */
+ int dig;
+
+ do {
+ for (dig = 0, lim = 1; dig < pos; dig++)
+ lim *= 10;
+
+ if (digits || in >= lim || !pos) {
+ for (dig = 0; in >= lim; dig++)
+ in -= lim;
+ buffer[digits++] = '0' + dig;
+ }
+ } while (pos--);
+
+ buffer[digits] = 0;
+ return digits;
+}
+
+/* Converts the signed 64-bit integer <in> to its string representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from
+ * the first byte, and the number of characters emitted (not counting the
+ * trailing zero) is returned.
+ */
+static __attribute__((unused))
+int i64toa_r(int64_t in, char *buffer)
+{
+ char *ptr = buffer;
+ int len = 0;
+
+ if (in < 0) {
+ in = -in;
+ *(ptr++) = '-';
+ len++;
+ }
+ len += u64toa_r(in, ptr);
+ return len;
+}
+
+/* converts int64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *i64toa(int64_t in)
+{
+ i64toa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+/* converts uint64_t <in> to a string using the static itoa_buffer and returns
+ * the pointer to that string.
+ */
+static inline __attribute__((unused))
+char *u64toa(uint64_t in)
+{
+ u64toa_r(in, itoa_buffer);
+ return itoa_buffer;
+}
+
+#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
new file mode 100644
index 000000000000..bef35bee9c44
--- /dev/null
+++ b/tools/include/nolibc/string.h
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * string function definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_STRING_H
+#define _NOLIBC_STRING_H
+
+#include "std.h"
+
+static void *malloc(size_t len);
+
+/*
+ * As much as possible, please keep functions alphabetically sorted.
+ */
+
+static __attribute__((unused))
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+ size_t ofs = 0;
+ char c1 = 0;
+
+ while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
+ ofs++;
+ }
+ return c1;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_up(void *dst, const void *src, size_t len)
+{
+ size_t pos = 0;
+
+ while (pos < len) {
+ ((char *)dst)[pos] = ((const char *)src)[pos];
+ pos++;
+ }
+ return dst;
+}
+
+static __attribute__((unused))
+void *_nolibc_memcpy_down(void *dst, const void *src, size_t len)
+{
+ while (len) {
+ len--;
+ ((char *)dst)[len] = ((const char *)src)[len];
+ }
+ return dst;
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memmove")))
+void *memmove(void *dst, const void *src, size_t len)
+{
+ size_t dir, pos;
+
+ pos = len;
+ dir = -1;
+
+ if (dst < src) {
+ pos = -1;
+ dir = 1;
+ }
+
+ while (len) {
+ pos += dir;
+ ((char *)dst)[pos] = ((const char *)src)[pos];
+ len--;
+ }
+ return dst;
+}
+
+/* must be exported, as it's used by libgcc on ARM */
+__attribute__((weak,unused,section(".text.nolibc_memcpy")))
+void *memcpy(void *dst, const void *src, size_t len)
+{
+ return _nolibc_memcpy_up(dst, src, len);
+}
+
+/* might be ignored by the compiler without -ffreestanding, then found as
+ * missing.
+ */
+__attribute__((weak,unused,section(".text.nolibc_memset")))
+void *memset(void *dst, int b, size_t len)
+{
+ char *p = dst;
+
+ while (len--)
+ *(p++) = b;
+ return dst;
+}
+
+static __attribute__((unused))
+char *strchr(const char *s, int c)
+{
+ while (*s) {
+ if (*s == (char)c)
+ return (char *)s;
+ s++;
+ }
+ return NULL;
+}
+
+static __attribute__((unused))
+int strcmp(const char *a, const char *b)
+{
+ unsigned int c;
+ int diff;
+
+ while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+ ;
+ return diff;
+}
+
+static __attribute__((unused))
+char *strcpy(char *dst, const char *src)
+{
+ char *ret = dst;
+
+ while ((*dst++ = *src++));
+ return ret;
+}
+
+/* this function is only used with arguments that are not constants or when
+ * it's not known because optimizations are disabled.
+ */
+static __attribute__((unused))
+size_t nolibc_strlen(const char *str)
+{
+ size_t len;
+
+ for (len = 0; str[len]; len++);
+ return len;
+}
+
+/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and
+ * the two branches, then will rely on an external definition of strlen().
+ */
+#if defined(__OPTIMIZE__)
+#define strlen(str) ({ \
+ __builtin_constant_p((str)) ? \
+ __builtin_strlen((str)) : \
+ nolibc_strlen((str)); \
+})
+#else
+#define strlen(str) nolibc_strlen((str))
+#endif
+
+static __attribute__((unused))
+size_t strnlen(const char *str, size_t maxlen)
+{
+ size_t len;
+
+ for (len = 0; (len < maxlen) && str[len]; len++);
+ return len;
+}
+
+static __attribute__((unused))
+char *strdup(const char *str)
+{
+ size_t len;
+ char *ret;
+
+ len = strlen(str);
+ ret = malloc(len + 1);
+ if (__builtin_expect(ret != NULL, 1))
+ memcpy(ret, str, len + 1);
+
+ return ret;
+}
+
+static __attribute__((unused))
+char *strndup(const char *str, size_t maxlen)
+{
+ size_t len;
+ char *ret;
+
+ len = strnlen(str, maxlen);
+ ret = malloc(len + 1);
+ if (__builtin_expect(ret != NULL, 1)) {
+ memcpy(ret, str, len);
+ ret[len] = '\0';
+ }
+
+ return ret;
+}
+
+static __attribute__((unused))
+size_t strlcat(char *dst, const char *src, size_t size)
+{
+ size_t len;
+ char c;
+
+ for (len = 0; dst[len]; len++)
+ ;
+
+ for (;;) {
+ c = *src;
+ if (len < size)
+ dst[len] = c;
+ if (!c)
+ break;
+ len++;
+ src++;
+ }
+
+ return len;
+}
+
+static __attribute__((unused))
+size_t strlcpy(char *dst, const char *src, size_t size)
+{
+ size_t len;
+ char c;
+
+ for (len = 0;;) {
+ c = src[len];
+ if (len < size)
+ dst[len] = c;
+ if (!c)
+ break;
+ len++;
+ }
+ return len;
+}
+
+static __attribute__((unused))
+char *strncat(char *dst, const char *src, size_t size)
+{
+ char *orig = dst;
+
+ while (*dst)
+ dst++;
+
+ while (size && (*dst = *src)) {
+ src++;
+ dst++;
+ size--;
+ }
+
+ *dst = 0;
+ return orig;
+}
+
+static __attribute__((unused))
+int strncmp(const char *a, const char *b, size_t size)
+{
+ unsigned int c;
+ int diff = 0;
+
+ while (size-- &&
+ !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
+ ;
+
+ return diff;
+}
+
+static __attribute__((unused))
+char *strncpy(char *dst, const char *src, size_t size)
+{
+ size_t len;
+
+ for (len = 0; len < size; len++)
+ if ((dst[len] = *src))
+ src++;
+ return dst;
+}
+
+static __attribute__((unused))
+char *strrchr(const char *s, int c)
+{
+ const char *ret = NULL;
+
+ while (*s) {
+ if (*s == (char)c)
+ ret = s;
+ s++;
+ }
+ return (char *)ret;
+}
+
+#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
new file mode 100644
index 000000000000..08491070387b
--- /dev/null
+++ b/tools/include/nolibc/sys.h
@@ -0,0 +1,1247 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Syscall definitions for NOLIBC (those in man(2))
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_SYS_H
+#define _NOLIBC_SYS_H
+
+#include <stdarg.h>
+#include "std.h"
+
+/* system includes */
+#include <asm/unistd.h>
+#include <asm/signal.h> // for SIGCHLD
+#include <asm/ioctls.h>
+#include <asm/mman.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <linux/time.h>
+
+#include "arch.h"
+#include "errno.h"
+#include "types.h"
+
+
+/* Functions in this file only describe syscalls. They're declared static so
+ * that the compiler usually decides to inline them while still being allowed
+ * to pass a pointer to one of their instances. Each syscall exists in two
+ * versions:
+ * - the "internal" ones, which matches the raw syscall interface at the
+ * kernel level, which may sometimes slightly differ from the documented
+ * libc-level ones. For example most of them return either a valid value
+ * or -errno. All of these are prefixed with "sys_". They may be called
+ * by non-portable applications if desired.
+ *
+ * - the "exported" ones, whose interface must closely match the one
+ * documented in man(2), that applications are supposed to expect. These
+ * ones rely on the internal ones, and set errno.
+ *
+ * Each syscall will be defined with the two functions, sorted in alphabetical
+ * order applied to the exported names.
+ *
+ * In case of doubt about the relevance of a function here, only those which
+ * set errno should be defined here. Wrappers like those appearing in man(3)
+ * should not be placed here.
+ */
+
+
+/*
+ * int brk(void *addr);
+ * void *sbrk(intptr_t inc)
+ */
+
+static __attribute__((unused))
+void *sys_brk(void *addr)
+{
+ return (void *)my_syscall1(__NR_brk, addr);
+}
+
+static __attribute__((unused))
+int brk(void *addr)
+{
+ void *ret = sys_brk(addr);
+
+ if (!ret) {
+ SET_ERRNO(ENOMEM);
+ return -1;
+ }
+ return 0;
+}
+
+static __attribute__((unused))
+void *sbrk(intptr_t inc)
+{
+ void *ret;
+
+ /* first call to find current end */
+ if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+ return ret + inc;
+
+ SET_ERRNO(ENOMEM);
+ return (void *)-1;
+}
+
+
+/*
+ * int chdir(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chdir(const char *path)
+{
+ return my_syscall1(__NR_chdir, path);
+}
+
+static __attribute__((unused))
+int chdir(const char *path)
+{
+ int ret = sys_chdir(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int chmod(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_chmod(const char *path, mode_t mode)
+{
+#ifdef __NR_fchmodat
+ return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
+#elif defined(__NR_chmod)
+ return my_syscall2(__NR_chmod, path, mode);
+#else
+#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
+#endif
+}
+
+static __attribute__((unused))
+int chmod(const char *path, mode_t mode)
+{
+ int ret = sys_chmod(path, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int chown(const char *path, uid_t owner, gid_t group);
+ */
+
+static __attribute__((unused))
+int sys_chown(const char *path, uid_t owner, gid_t group)
+{
+#ifdef __NR_fchownat
+ return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
+#elif defined(__NR_chown)
+ return my_syscall3(__NR_chown, path, owner, group);
+#else
+#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
+#endif
+}
+
+static __attribute__((unused))
+int chown(const char *path, uid_t owner, gid_t group)
+{
+ int ret = sys_chown(path, owner, group);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int chroot(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_chroot(const char *path)
+{
+ return my_syscall1(__NR_chroot, path);
+}
+
+static __attribute__((unused))
+int chroot(const char *path)
+{
+ int ret = sys_chroot(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int close(int fd);
+ */
+
+static __attribute__((unused))
+int sys_close(int fd)
+{
+ return my_syscall1(__NR_close, fd);
+}
+
+static __attribute__((unused))
+int close(int fd)
+{
+ int ret = sys_close(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int dup(int fd);
+ */
+
+static __attribute__((unused))
+int sys_dup(int fd)
+{
+ return my_syscall1(__NR_dup, fd);
+}
+
+static __attribute__((unused))
+int dup(int fd)
+{
+ int ret = sys_dup(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int dup2(int old, int new);
+ */
+
+static __attribute__((unused))
+int sys_dup2(int old, int new)
+{
+#ifdef __NR_dup3
+ return my_syscall3(__NR_dup3, old, new, 0);
+#elif defined(__NR_dup2)
+ return my_syscall2(__NR_dup2, old, new);
+#else
+#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+#endif
+}
+
+static __attribute__((unused))
+int dup2(int old, int new)
+{
+ int ret = sys_dup2(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int dup3(int old, int new, int flags);
+ */
+
+#ifdef __NR_dup3
+static __attribute__((unused))
+int sys_dup3(int old, int new, int flags)
+{
+ return my_syscall3(__NR_dup3, old, new, flags);
+}
+
+static __attribute__((unused))
+int dup3(int old, int new, int flags)
+{
+ int ret = sys_dup3(old, new, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+#endif
+
+
+/*
+ * int execve(const char *filename, char *const argv[], char *const envp[]);
+ */
+
+static __attribute__((unused))
+int sys_execve(const char *filename, char *const argv[], char *const envp[])
+{
+ return my_syscall3(__NR_execve, filename, argv, envp);
+}
+
+static __attribute__((unused))
+int execve(const char *filename, char *const argv[], char *const envp[])
+{
+ int ret = sys_execve(filename, argv, envp);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * void exit(int status);
+ */
+
+static __attribute__((noreturn,unused))
+void sys_exit(int status)
+{
+ my_syscall1(__NR_exit, status & 255);
+ while(1); // shut the "noreturn" warnings.
+}
+
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+ sys_exit(status);
+}
+
+
+/*
+ * pid_t fork(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+#ifdef __NR_clone
+ /* note: some archs only have clone() and not fork(). Different archs
+ * have a different API, but most archs have the flags on first arg and
+ * will not use the rest with no other flag.
+ */
+ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
+#elif defined(__NR_fork)
+ return my_syscall0(__NR_fork);
+#else
+#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+#endif
+}
+
+static __attribute__((unused))
+pid_t fork(void)
+{
+ pid_t ret = sys_fork();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int fsync(int fd);
+ */
+
+static __attribute__((unused))
+int sys_fsync(int fd)
+{
+ return my_syscall1(__NR_fsync, fd);
+}
+
+static __attribute__((unused))
+int fsync(int fd)
+{
+ int ret = sys_fsync(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int getdents64(int fd, struct linux_dirent64 *dirp, int count);
+ */
+
+static __attribute__((unused))
+int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+ return my_syscall3(__NR_getdents64, fd, dirp, count);
+}
+
+static __attribute__((unused))
+int getdents64(int fd, struct linux_dirent64 *dirp, int count)
+{
+ int ret = sys_getdents64(fd, dirp, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t getpgid(pid_t pid);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgid(pid_t pid)
+{
+ return my_syscall1(__NR_getpgid, pid);
+}
+
+static __attribute__((unused))
+pid_t getpgid(pid_t pid)
+{
+ pid_t ret = sys_getpgid(pid);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t getpgrp(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpgrp(void)
+{
+ return sys_getpgid(0);
+}
+
+static __attribute__((unused))
+pid_t getpgrp(void)
+{
+ return sys_getpgrp();
+}
+
+
+/*
+ * pid_t getpid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getpid(void)
+{
+ return my_syscall0(__NR_getpid);
+}
+
+static __attribute__((unused))
+pid_t getpid(void)
+{
+ return sys_getpid();
+}
+
+
+/*
+ * pid_t getppid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_getppid(void)
+{
+ return my_syscall0(__NR_getppid);
+}
+
+static __attribute__((unused))
+pid_t getppid(void)
+{
+ return sys_getppid();
+}
+
+
+/*
+ * pid_t gettid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_gettid(void)
+{
+ return my_syscall0(__NR_gettid);
+}
+
+static __attribute__((unused))
+pid_t gettid(void)
+{
+ return sys_gettid();
+}
+
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return my_syscall2(__NR_gettimeofday, tv, tz);
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ int ret = sys_gettimeofday(tv, tz);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int ioctl(int fd, unsigned long req, void *value);
+ */
+
+static __attribute__((unused))
+int sys_ioctl(int fd, unsigned long req, void *value)
+{
+ return my_syscall3(__NR_ioctl, fd, req, value);
+}
+
+static __attribute__((unused))
+int ioctl(int fd, unsigned long req, void *value)
+{
+ int ret = sys_ioctl(fd, req, value);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * int kill(pid_t pid, int signal);
+ */
+
+static __attribute__((unused))
+int sys_kill(pid_t pid, int signal)
+{
+ return my_syscall2(__NR_kill, pid, signal);
+}
+
+static __attribute__((unused))
+int kill(pid_t pid, int signal)
+{
+ int ret = sys_kill(pid, signal);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int link(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_link(const char *old, const char *new)
+{
+#ifdef __NR_linkat
+ return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
+#elif defined(__NR_link)
+ return my_syscall2(__NR_link, old, new);
+#else
+#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
+#endif
+}
+
+static __attribute__((unused))
+int link(const char *old, const char *new)
+{
+ int ret = sys_link(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * off_t lseek(int fd, off_t offset, int whence);
+ */
+
+static __attribute__((unused))
+off_t sys_lseek(int fd, off_t offset, int whence)
+{
+ return my_syscall3(__NR_lseek, fd, offset, whence);
+}
+
+static __attribute__((unused))
+off_t lseek(int fd, off_t offset, int whence)
+{
+ off_t ret = sys_lseek(fd, offset, whence);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int mkdir(const char *path, mode_t mode);
+ */
+
+static __attribute__((unused))
+int sys_mkdir(const char *path, mode_t mode)
+{
+#ifdef __NR_mkdirat
+ return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
+#elif defined(__NR_mkdir)
+ return my_syscall2(__NR_mkdir, path, mode);
+#else
+#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
+#endif
+}
+
+static __attribute__((unused))
+int mkdir(const char *path, mode_t mode)
+{
+ int ret = sys_mkdir(path, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int mknod(const char *path, mode_t mode, dev_t dev);
+ */
+
+static __attribute__((unused))
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
+{
+#ifdef __NR_mknodat
+ return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
+#elif defined(__NR_mknod)
+ return my_syscall3(__NR_mknod, path, mode, dev);
+#else
+#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
+#endif
+}
+
+static __attribute__((unused))
+int mknod(const char *path, mode_t mode, dev_t dev)
+{
+ int ret = sys_mknod(path, mode, dev);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+#ifndef MAP_SHARED
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#endif
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset)
+{
+#ifndef my_syscall6
+ /* Function not implemented. */
+ return -ENOSYS;
+#else
+
+ int n;
+
+#if defined(__i386__)
+ n = __NR_mmap2;
+ offset >>= 12;
+#else
+ n = __NR_mmap;
+#endif
+
+ return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+#endif
+}
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+ return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+ int ret = sys_munmap(addr, length);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * int mount(const char *source, const char *target,
+ * const char *fstype, unsigned long flags,
+ * const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+ const char *fst, unsigned long flags,
+ const void *data)
+{
+ int ret = sys_mount(src, tgt, fst, flags, data);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+#ifdef __NR_openat
+ return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+#elif defined(__NR_open)
+ return my_syscall3(__NR_open, path, flags, mode);
+#else
+#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
+#endif
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+ int ret;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ ret = sys_open(path, flags, mode);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int pivot_root(const char *new, const char *old);
+ */
+
+static __attribute__((unused))
+int sys_pivot_root(const char *new, const char *old)
+{
+ return my_syscall2(__NR_pivot_root, new, old);
+}
+
+static __attribute__((unused))
+int pivot_root(const char *new, const char *old)
+{
+ int ret = sys_pivot_root(new, old);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+ struct timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
+#elif defined(__NR_poll)
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+ int ret = sys_poll(fds, nfds, timeout);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * ssize_t read(int fd, void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_read(int fd, void *buf, size_t count)
+{
+ return my_syscall3(__NR_read, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t read(int fd, void *buf, size_t count)
+{
+ ssize_t ret = sys_read(fd, buf, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+ return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+ int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int sched_yield(void);
+ */
+
+static __attribute__((unused))
+int sys_sched_yield(void)
+{
+ return my_syscall0(__NR_sched_yield);
+}
+
+static __attribute__((unused))
+int sched_yield(void)
+{
+ int ret = sys_sched_yield();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ * fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+ struct sel_arg_struct {
+ unsigned long n;
+ fd_set *r, *w, *e;
+ struct timeval *t;
+ } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+ return my_syscall1(__NR_select, &arg);
+#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
+ struct timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#elif defined(__NR__newselect) || defined(__NR_select)
+#ifndef __NR__newselect
+#define __NR__newselect __NR_select
+#endif
+ return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#else
+#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+ int ret = sys_select(nfds, rfds, wfds, efds, timeout);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int setpgid(pid_t pid, pid_t pgid);
+ */
+
+static __attribute__((unused))
+int sys_setpgid(pid_t pid, pid_t pgid)
+{
+ return my_syscall2(__NR_setpgid, pid, pgid);
+}
+
+static __attribute__((unused))
+int setpgid(pid_t pid, pid_t pgid)
+{
+ int ret = sys_setpgid(pid, pgid);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t setsid(void);
+ */
+
+static __attribute__((unused))
+pid_t sys_setsid(void)
+{
+ return my_syscall0(__NR_setsid);
+}
+
+static __attribute__((unused))
+pid_t setsid(void)
+{
+ pid_t ret = sys_setsid();
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int stat(const char *path, struct stat *buf);
+ * Warning: the struct stat's layout is arch-dependent.
+ */
+
+static __attribute__((unused))
+int sys_stat(const char *path, struct stat *buf)
+{
+ struct sys_stat_struct stat;
+ long ret;
+
+#ifdef __NR_newfstatat
+ /* only solution for arm64 */
+ ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
+#elif defined(__NR_stat)
+ ret = my_syscall2(__NR_stat, path, &stat);
+#else
+#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
+#endif
+ buf->st_dev = stat.st_dev;
+ buf->st_ino = stat.st_ino;
+ buf->st_mode = stat.st_mode;
+ buf->st_nlink = stat.st_nlink;
+ buf->st_uid = stat.st_uid;
+ buf->st_gid = stat.st_gid;
+ buf->st_rdev = stat.st_rdev;
+ buf->st_size = stat.st_size;
+ buf->st_blksize = stat.st_blksize;
+ buf->st_blocks = stat.st_blocks;
+ buf->st_atime = stat.st_atime;
+ buf->st_mtime = stat.st_mtime;
+ buf->st_ctime = stat.st_ctime;
+ return ret;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+ int ret = sys_stat(path, buf);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int symlink(const char *old, const char *new);
+ */
+
+static __attribute__((unused))
+int sys_symlink(const char *old, const char *new)
+{
+#ifdef __NR_symlinkat
+ return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
+#elif defined(__NR_symlink)
+ return my_syscall2(__NR_symlink, old, new);
+#else
+#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
+#endif
+}
+
+static __attribute__((unused))
+int symlink(const char *old, const char *new)
+{
+ int ret = sys_symlink(old, new);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * mode_t umask(mode_t mode);
+ */
+
+static __attribute__((unused))
+mode_t sys_umask(mode_t mode)
+{
+ return my_syscall1(__NR_umask, mode);
+}
+
+static __attribute__((unused))
+mode_t umask(mode_t mode)
+{
+ return sys_umask(mode);
+}
+
+
+/*
+ * int umount2(const char *path, int flags);
+ */
+
+static __attribute__((unused))
+int sys_umount2(const char *path, int flags)
+{
+ return my_syscall2(__NR_umount2, path, flags);
+}
+
+static __attribute__((unused))
+int umount2(const char *path, int flags)
+{
+ int ret = sys_umount2(path, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * int unlink(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_unlink(const char *path)
+{
+#ifdef __NR_unlinkat
+ return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
+#elif defined(__NR_unlink)
+ return my_syscall1(__NR_unlink, path);
+#else
+#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
+#endif
+}
+
+static __attribute__((unused))
+int unlink(const char *path)
+{
+ int ret = sys_unlink(path);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * pid_t wait(int *status);
+ * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ */
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ return my_syscall4(__NR_wait4, pid, status, options, rusage);
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+ pid_t ret = sys_wait4(-1, status, 0, NULL);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ pid_t ret = sys_wait4(pid, status, options, rusage);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+ pid_t ret = sys_wait4(pid, status, options, NULL);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/*
+ * ssize_t write(int fd, const void *buf, size_t count);
+ */
+
+static __attribute__((unused))
+ssize_t sys_write(int fd, const void *buf, size_t count)
+{
+ return my_syscall3(__NR_write, fd, buf, count);
+}
+
+static __attribute__((unused))
+ssize_t write(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = sys_write(fd, buf, count);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+
+#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
new file mode 100644
index 000000000000..d18b7661fdd7
--- /dev/null
+++ b/tools/include/nolibc/time.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TIME_H
+#define _NOLIBC_TIME_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+static __attribute__((unused))
+time_t time(time_t *tptr)
+{
+ struct timeval tv;
+
+ /* note, cannot fail here */
+ sys_gettimeofday(&tv, NULL);
+
+ if (tptr)
+ *tptr = tv.tv_sec;
+ return tv.tv_sec;
+}
+
+#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
new file mode 100644
index 000000000000..959997034e55
--- /dev/null
+++ b/tools/include/nolibc/types.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Special types used by various syscalls for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_TYPES_H
+#define _NOLIBC_TYPES_H
+
+#include "std.h"
+#include <linux/time.h>
+
+
+/* Only the generic macros and types may be defined here. The arch-specific
+ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
+ * the layout of sys_stat_struct must not be defined here.
+ */
+
+/* stat flags (WARNING, octal here) */
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFBLK 0060000
+#define S_IFREG 0100000
+#define S_IFIFO 0010000
+#define S_IFLNK 0120000
+#define S_IFSOCK 0140000
+#define S_IFMT 0170000
+
+#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR)
+#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR)
+#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK)
+#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG)
+#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO)
+#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK)
+#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK)
+
+/* dirent types */
+#define DT_UNKNOWN 0x0
+#define DT_FIFO 0x1
+#define DT_CHR 0x2
+#define DT_DIR 0x4
+#define DT_BLK 0x6
+#define DT_REG 0x8
+#define DT_LNK 0xa
+#define DT_SOCK 0xc
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE 256
+#endif
+
+/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
+ * values.
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef MAXPATHLEN
+#define MAXPATHLEN (PATH_MAX)
+#endif
+
+/* Special FD used by all the *at functions */
+#ifndef AT_FDCWD
+#define AT_FDCWD (-100)
+#endif
+
+/* whence values for lseek() */
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+/* cmd for reboot() */
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 0x28121969
+#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART 0x01234567
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+
+/* Macros used on waitpid()'s return status */
+#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+#define WIFEXITED(status) (((status) & 0x7f) == 0)
+
+/* waitpid() flags */
+#define WNOHANG 1
+
+/* standard exit() codes */
+#define EXIT_SUCCESS 0
+#define EXIT_FAILURE 1
+
+/* for select() */
+typedef struct {
+ uint32_t fd32[(FD_SETSIZE + 31) / 32];
+} fd_set;
+
+#define FD_CLR(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \
+ } while (0)
+
+#define FD_SET(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fd32[__fd / 32] |= 1U << (__fd & 31); \
+ } while (0)
+
+#define FD_ISSET(fd, set) ({ \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ int __r = 0; \
+ if (__fd >= 0) \
+ __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \
+ __r; \
+ })
+
+#define FD_ZERO(set) do { \
+ fd_set *__set = (set); \
+ int __idx; \
+ for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \
+ __set->fd32[__idx] = 0; \
+ } while (0)
+
+/* for poll() */
+#define POLLIN 0x0001
+#define POLLPRI 0x0002
+#define POLLOUT 0x0004
+#define POLLERR 0x0008
+#define POLLHUP 0x0010
+#define POLLNVAL 0x0020
+
+struct pollfd {
+ int fd;
+ short int events;
+ short int revents;
+};
+
+/* for getdents64() */
+struct linux_dirent64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ unsigned short d_reclen;
+ unsigned char d_type;
+ char d_name[];
+};
+
+/* needed by wait4() */
+struct rusage {
+ struct timeval ru_utime;
+ struct timeval ru_stime;
+ long ru_maxrss;
+ long ru_ixrss;
+ long ru_idrss;
+ long ru_isrss;
+ long ru_minflt;
+ long ru_majflt;
+ long ru_nswap;
+ long ru_inblock;
+ long ru_oublock;
+ long ru_msgsnd;
+ long ru_msgrcv;
+ long ru_nsignals;
+ long ru_nvcsw;
+ long ru_nivcsw;
+};
+
+/* The format of the struct as returned by the libc to the application, which
+ * significantly differs from the format returned by the stat() syscall flavours.
+ */
+struct stat {
+ dev_t st_dev; /* ID of device containing file */
+ ino_t st_ino; /* inode number */
+ mode_t st_mode; /* protection */
+ nlink_t st_nlink; /* number of hard links */
+ uid_t st_uid; /* user ID of owner */
+ gid_t st_gid; /* group ID of owner */
+ dev_t st_rdev; /* device ID (if special file) */
+ off_t st_size; /* total size, in bytes */
+ blksize_t st_blksize; /* blocksize for file system I/O */
+ blkcnt_t st_blocks; /* number of 512B blocks allocated */
+ time_t st_atime; /* time of last access */
+ time_t st_mtime; /* time of last modification */
+ time_t st_ctime; /* time of last status change */
+};
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)(((dev) & 0xff))
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#ifndef container_of
+#define container_of(PTR, TYPE, FIELD) ({ \
+ __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \
+ (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \
+})
+#endif
+
+#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
new file mode 100644
index 000000000000..1c25e20ee360
--- /dev/null
+++ b/tools/include/nolibc/unistd.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * unistd function definitions for NOLIBC
+ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_UNISTD_H
+#define _NOLIBC_UNISTD_H
+
+#include "std.h"
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+
+static __attribute__((unused))
+int msleep(unsigned int msecs)
+{
+ struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return (my_timeval.tv_sec * 1000) +
+ (my_timeval.tv_usec / 1000) +
+ !!(my_timeval.tv_usec % 1000);
+ else
+ return 0;
+}
+
+static __attribute__((unused))
+unsigned int sleep(unsigned int seconds)
+{
+ struct timeval my_timeval = { seconds, 0 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return my_timeval.tv_sec + !!my_timeval.tv_usec;
+ else
+ return 0;
+}
+
+static __attribute__((unused))
+int usleep(unsigned int usecs)
+{
+ struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
+
+ return sys_select(0, 0, 0, 0, &my_timeval);
+}
+
+static __attribute__((unused))
+int tcsetpgrp(int fd, pid_t pid)
+{
+ return ioctl(fd, TIOCSPGRP, &pid);
+}
+
+#endif /* _NOLIBC_UNISTD_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 91a6fe4e02c0..6a184d260c7f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -445,7 +445,13 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
__u32 type;
- __u64 flags;
+ __u32 ndata;
+ union {
+#ifndef __KERNEL__
+ __u64 flags;
+#endif
+ __u64 data[16];
+ };
} system_event;
/* KVM_EXIT_S390_STSI */
struct {
@@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
#define KVM_CAP_PMU_CAPABILITY 212
#define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 9edd402704c4..dab38904206a 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -54,7 +54,8 @@ klitmus7 Compatibility Table
-- 4.14 7.48 --
4.15 -- 4.19 7.49 --
4.20 -- 5.5 7.54 --
- 5.6 -- 7.56 --
+ 5.6 -- 5.16 7.56 --
+ 5.17 -- 7.56.1 --
============ ==========
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index bd0c2c828940..ca5b74603008 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -184,6 +184,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"do_group_exit",
"stop_this_cpu",
"__invalid_creds",
+ "cpu_startup_entry",
};
if (!func)
@@ -559,12 +560,12 @@ static int add_dead_ends(struct objtool_file *file)
else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
- WARN("can't find unreachable insn at %s+0x%x",
+ WARN("can't find unreachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
- WARN("can't find unreachable insn at %s+0x%x",
+ WARN("can't find unreachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -594,12 +595,12 @@ reachable:
else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
- WARN("can't find reachable insn at %s+0x%x",
+ WARN("can't find reachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
- WARN("can't find reachable insn at %s+0x%x",
+ WARN("can't find reachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -1271,12 +1272,19 @@ static bool is_first_func_insn(struct objtool_file *file, struct instruction *in
*/
static int add_jump_destinations(struct objtool_file *file)
{
- struct instruction *insn;
+ struct instruction *insn, *jump_dest;
struct reloc *reloc;
struct section *dest_sec;
unsigned long dest_off;
for_each_insn(file, insn) {
+ if (insn->jump_dest) {
+ /*
+ * handle_group_alt() may have previously set
+ * 'jump_dest' for some alternatives.
+ */
+ continue;
+ }
if (!is_static_jump(insn))
continue;
@@ -1291,7 +1299,10 @@ static int add_jump_destinations(struct objtool_file *file)
add_retpoline_call(file, insn);
continue;
} else if (insn->func) {
- /* internal or external sibling call (with reloc) */
+ /*
+ * External sibling call or internal sibling call with
+ * STT_FUNC reloc.
+ */
add_call_dest(file, insn, reloc->sym, true);
continue;
} else if (reloc->sym->sec->idx) {
@@ -1303,17 +1314,8 @@ static int add_jump_destinations(struct objtool_file *file)
continue;
}
- insn->jump_dest = find_insn(file, dest_sec, dest_off);
- if (!insn->jump_dest) {
-
- /*
- * This is a special case where an alt instruction
- * jumps past the end of the section. These are
- * handled later in handle_group_alt().
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- continue;
-
+ jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!jump_dest) {
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);
@@ -1323,8 +1325,8 @@ static int add_jump_destinations(struct objtool_file *file)
/*
* Cross-function jump.
*/
- if (insn->func && insn->jump_dest->func &&
- insn->func != insn->jump_dest->func) {
+ if (insn->func && jump_dest->func &&
+ insn->func != jump_dest->func) {
/*
* For GCC 8+, create parent/child links for any cold
@@ -1342,16 +1344,22 @@ static int add_jump_destinations(struct objtool_file *file)
* subfunction is through a jump table.
*/
if (!strstr(insn->func->name, ".cold") &&
- strstr(insn->jump_dest->func->name, ".cold")) {
- insn->func->cfunc = insn->jump_dest->func;
- insn->jump_dest->func->pfunc = insn->func;
+ strstr(jump_dest->func->name, ".cold")) {
+ insn->func->cfunc = jump_dest->func;
+ jump_dest->func->pfunc = insn->func;
- } else if (!same_function(insn, insn->jump_dest) &&
- is_first_func_insn(file, insn->jump_dest)) {
- /* internal sibling call (without reloc) */
- add_call_dest(file, insn, insn->jump_dest->func, true);
+ } else if (!same_function(insn, jump_dest) &&
+ is_first_func_insn(file, jump_dest)) {
+ /*
+ * Internal sibling call without reloc or with
+ * STT_SECTION reloc.
+ */
+ add_call_dest(file, insn, jump_dest->func, true);
+ continue;
}
}
+
+ insn->jump_dest = jump_dest;
}
return 0;
@@ -1540,13 +1548,13 @@ static int handle_group_alt(struct objtool_file *file,
continue;
dest_off = arch_jump_destination(insn);
- if (dest_off == special_alt->new_off + special_alt->new_len)
+ if (dest_off == special_alt->new_off + special_alt->new_len) {
insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
-
- if (!insn->jump_dest) {
- WARN_FUNC("can't find alternative jump destination",
- insn->sec, insn->offset);
- return -1;
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find alternative jump destination",
+ insn->sec, insn->offset);
+ return -1;
+ }
}
}
@@ -2245,14 +2253,14 @@ static int decode_sections(struct objtool_file *file)
return ret;
/*
- * Must be before add_special_section_alts() as that depends on
- * jump_dest being set.
+ * Must be before add_jump_destinations(), which depends on 'func'
+ * being set for alternatives, to enable proper sibling call detection.
*/
- ret = add_jump_destinations(file);
+ ret = add_special_section_alts(file);
if (ret)
return ret;
- ret = add_special_section_alts(file);
+ ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -3210,9 +3218,8 @@ validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset,
struct instruction *dest)
{
- WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg,
- dest->func ? dest->func->name : dest->sec->name,
- dest->func ? dest->offset - dest->func->offset : dest->offset);
+ WARN_FUNC("%srelocation to !ENDBR: %s", sec, offset, msg,
+ offstr(dest->sec, dest->offset));
}
static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn,
@@ -3303,7 +3310,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
while (1) {
next_insn = next_insn_to_validate(file, insn);
- if (file->c_file && func && insn->func && func != insn->func->pfunc) {
+ if (func && insn->func && func != insn->func->pfunc) {
WARN("%s() falls through to next function %s()",
func->name, insn->func->name);
return 1;
@@ -3816,11 +3823,8 @@ static int validate_ibt(struct objtool_file *file)
struct instruction *dest;
dest = validate_ibt_reloc(file, reloc);
- if (is_data && dest && !dest->noendbr) {
- warn_noendbr("data ", reloc->sym->sec,
- reloc->sym->offset + reloc->addend,
- dest);
- }
+ if (is_data && dest && !dest->noendbr)
+ warn_noendbr("data ", sec, reloc->offset, dest);
}
}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d7b99a737496..ebf2ba5755c1 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -546,7 +546,7 @@ static struct section *elf_create_reloc_section(struct elf *elf,
int reltype);
int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, int addend)
+ unsigned int type, struct symbol *sym, long addend)
{
struct reloc *reloc;
@@ -575,37 +575,180 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
return 0;
}
-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int type,
- struct section *insn_sec, unsigned long insn_off)
+/*
+ * Ensure that any reloc section containing references to @sym is marked
+ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
+ * with the new symbol index.
+ */
+static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ struct reloc *reloc;
+
+ if (sec->changed)
+ continue;
+
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym == sym) {
+ sec->changed = true;
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * Move the first global symbol, as per sh_info, into a new, higher symbol
+ * index. This fees up the shndx for a new local symbol.
+ */
+static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
+ struct section *symtab_shndx)
{
+ Elf_Data *data, *shndx_data = NULL;
+ Elf32_Word first_non_local;
struct symbol *sym;
- int addend;
+ Elf_Scn *s;
- if (insn_sec->sym) {
- sym = insn_sec->sym;
- addend = insn_off;
+ first_non_local = symtab->sh.sh_info;
- } else {
- /*
- * The Clang assembler strips section symbols, so we have to
- * reference the function symbol instead:
- */
- sym = find_symbol_containing(insn_sec, insn_off);
- if (!sym) {
- /*
- * Hack alert. This happens when we need to reference
- * the NOP pad insn immediately after the function.
- */
- sym = find_symbol_containing(insn_sec, insn_off - 1);
+ sym = find_symbol_by_index(elf, first_non_local);
+ if (!sym) {
+ WARN("no non-local symbols !?");
+ return first_non_local;
+ }
+
+ s = elf_getscn(elf->elf, symtab->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ data->d_buf = &sym->sym;
+ data->d_size = sizeof(sym->sym);
+ data->d_align = 1;
+ data->d_type = ELF_T_SYM;
+
+ sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
+ elf_dirty_reloc_sym(elf, sym);
+
+ symtab->sh.sh_info += 1;
+ symtab->sh.sh_size += data->d_size;
+ symtab->changed = true;
+
+ if (symtab_shndx) {
+ s = elf_getscn(elf->elf, symtab_shndx->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
}
- if (!sym) {
- WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
+ shndx_data = elf_newdata(s);
+ if (!shndx_data) {
+ WARN_ELF("elf_newshndx_data");
return -1;
}
- addend = insn_off - sym->offset;
+ shndx_data->d_buf = &sym->sec->idx;
+ shndx_data->d_size = sizeof(Elf32_Word);
+ shndx_data->d_align = 4;
+ shndx_data->d_type = ELF_T_WORD;
+
+ symtab_shndx->sh.sh_size += 4;
+ symtab_shndx->changed = true;
+ }
+
+ return first_non_local;
+}
+
+static struct symbol *
+elf_create_section_symbol(struct elf *elf, struct section *sec)
+{
+ struct section *symtab, *symtab_shndx;
+ Elf_Data *shndx_data = NULL;
+ struct symbol *sym;
+ Elf32_Word shndx;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (symtab) {
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx)
+ shndx_data = symtab_shndx->data;
+ } else {
+ WARN("no .symtab");
+ return NULL;
+ }
+
+ sym = malloc(sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(sym, 0, sizeof(*sym));
+
+ sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
+ if (sym->idx < 0) {
+ WARN("elf_move_global_symbol");
+ return NULL;
+ }
+
+ sym->name = sec->name;
+ sym->sec = sec;
+
+ // st_name 0
+ sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
+ // st_other 0
+ // st_value 0
+ // st_size 0
+ shndx = sec->idx;
+ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+ sym->sym.st_shndx = shndx;
+ if (!shndx_data)
+ shndx = 0;
+ } else {
+ sym->sym.st_shndx = SHN_XINDEX;
+ if (!shndx_data) {
+ WARN("no .symtab_shndx");
+ return NULL;
+ }
+ }
+
+ if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
+ WARN_ELF("gelf_update_symshndx");
+ return NULL;
+ }
+
+ elf_add_symbol(elf, sym);
+
+ return sym;
+}
+
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off)
+{
+ struct symbol *sym = insn_sec->sym;
+ int addend = insn_off;
+
+ if (!sym) {
+ /*
+ * Due to how weak functions work, we must use section based
+ * relocations. Symbol based relocations would result in the
+ * weak and non-weak function annotations being overlaid on the
+ * non-weak function after linking.
+ */
+ sym = elf_create_section_symbol(elf, insn_sec);
+ if (!sym)
+ return -1;
+
+ insn_sec->sym = sym;
}
return elf_add_reloc(elf, sec, offset, type, sym, addend);
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 22ba7e2b816e..9b36802ed86f 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -73,7 +73,7 @@ struct reloc {
struct symbol *sym;
unsigned long offset;
unsigned int type;
- int addend;
+ long addend;
int idx;
bool jump_table_start;
};
@@ -135,7 +135,7 @@ struct elf *elf_open_read(const char *name, int flags);
struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, int addend);
+ unsigned int type, struct symbol *sym, long addend);
int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int type,
struct section *insn_sec, unsigned long insn_off);
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 7a5c13a78f87..a6e72d916807 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -27,7 +27,7 @@ struct objtool_file {
struct list_head static_call_list;
struct list_head mcount_loc_list;
struct list_head endbr_list;
- bool ignore_unreachables, c_file, hints, rodata;
+ bool ignore_unreachables, hints, rodata;
unsigned int nr_endbr;
unsigned int nr_endbr_int;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index b09946f4e1d6..843ff3c2f28e 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -129,7 +129,6 @@ struct objtool_file *objtool_open_read(const char *_objname)
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
INIT_LIST_HEAD(&file.endbr_list);
- file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
file.hints = false;
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index f3bf9297bcc0..1bd64e7404b9 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -553,9 +553,16 @@ ifndef NO_LIBELF
ifeq ($(feature-libbpf), 1)
EXTLIBS += -lbpf
$(call detected,CONFIG_LIBBPF_DYNAMIC)
+
+ $(call feature_check,libbpf-btf__load_from_kernel_by_id)
+ ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
+ CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+ endif
else
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
endif
+ else
+ CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
endif
endif
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index af4d63af8072..e8b577d33e53 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -148,6 +148,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
+ u64 bit;
sper->evlist = evlist;
@@ -245,6 +246,15 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
*/
evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
+ /*
+ * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
+ * inform that the resulting output's SPE samples contain physical addresses
+ * where applicable.
+ */
+ bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
+ if (arm_spe_evsel->core.attr.config & bit)
+ evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
+
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index d2ce31e28cd7..41c1596e5207 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -8,27 +8,6 @@
#include "callchain.h"
#include "record.h"
-/* On arm64, kernel text segment starts at high memory address,
- * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only small amount of
- * memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may reach 2G.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-#define SYMBOL_LIMIT (1 << 12) /* 4K */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
- (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
- /* Limit range of last symbol in module and kernel */
- p->end += SYMBOL_LIMIT;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
-
void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 8a79c4126e5b..0115f3166568 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,4 @@
perf-y += header.o
-perf-y += machine.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
deleted file mode 100644
index e652a1aa8132..000000000000
--- a/tools/perf/arch/powerpc/util/machine.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include <internal/lib.h> // page_size
-#include "debug.h"
-#include "symbol.h"
-
-/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
- * whereas the modules are located at very high memory addresses,
- * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very high.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
- /* Limit the range of last kernel symbol */
- p->end += page_size;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c
index 7644a4f6d4a4..98bc3f39d5f3 100644
--- a/tools/perf/arch/s390/util/machine.c
+++ b/tools/perf/arch/s390/util/machine.c
@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
return 0;
}
-
-/* On s390 kernel text segment start is located at very low memory addresses,
- * for example 0x10000. Modules are located at very high memory addresses,
- * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very big.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
- /* Last kernel symbol mapped to end of page */
- p->end = roundup(p->end, page_size);
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 207c56805c55..0ed177991ad0 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -9,6 +9,8 @@
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
+#include "../../../util/pmu.h"
+#include "../../../util/pmu-hybrid.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
@@ -284,12 +286,22 @@ uint64_t arch__intr_reg_mask(void)
.disabled = 1,
.exclude_kernel = 1,
};
+ struct perf_pmu *pmu;
int fd;
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
+ if (perf_pmu__has_hybrid()) {
+ /*
+ * The same register set is supported among different hybrid PMUs.
+ * Only check the first available one.
+ */
+ pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
+ attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+ }
+
event_attr_init(&attr);
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 44e1f8a44087..20eed1e53f80 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -311,6 +311,7 @@ err_out:
/* BUG_ON due to failure in allocation of orig_mask/mask */
BUG_ON(-1);
+ return NULL;
}
static cpu_set_t *bind_to_node(int target_node)
@@ -364,6 +365,7 @@ err_out:
/* BUG_ON due to failure in allocation of orig_mask/mask */
BUG_ON(-1);
+ return NULL;
}
static void bind_to_cpumask(cpu_set_t *mask)
@@ -1738,7 +1740,7 @@ static int __bench_numa(const char *name)
"GB/sec,", "total-speed", "GB/sec total speed");
if (g->p.show_details >= 2) {
- char tname[14 + 2 * 10 + 1];
+ char tname[14 + 2 * 11 + 1];
struct thread_data *td;
for (p = 0; p < g->p.nr_proc; p++) {
for (t = 0; t < g->p.nr_threads; t++) {
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1ad75c7ba074..afe4a5539ecc 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -353,6 +353,7 @@ static int report__setup_sample_type(struct report *rep)
struct perf_session *session = rep->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data__is_pipe(session->data);
+ struct evsel *evsel;
if (session->itrace_synth_opts->callchain ||
session->itrace_synth_opts->add_callchain ||
@@ -407,6 +408,19 @@ static int report__setup_sample_type(struct report *rep)
}
if (sort__mode == SORT_MODE__MEMORY) {
+ /*
+ * FIXUP: prior to kernel 5.18, Arm SPE missed to set
+ * PERF_SAMPLE_DATA_SRC bit in sample type. For backward
+ * compatibility, set the bit if it's an old perf data file.
+ */
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (strstr(evsel->name, "arm_spe") &&
+ !(sample_type & PERF_SAMPLE_DATA_SRC)) {
+ evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC;
+ sample_type |= PERF_SAMPLE_DATA_SRC;
+ }
+ }
+
if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
ui__error("Selected --mem-mode but no mem data. "
"Did you call perf record without -d?\n");
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index a2f117936188..cf5eab5431b4 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -461,7 +461,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(DATA_SRC) &&
- evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index 454505d343fa..eb3f7d4bb324 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -60,6 +60,7 @@ Following tests are defined (with perf commands):
perf record -R kill (test-record-raw)
perf record -c 2 -e arm_spe_0// -- kill (test-record-spe-period)
perf record -e arm_spe_0/period=3/ -- kill (test-record-spe-period-term)
+ perf record -e arm_spe_0/pa_enable=1/ -- kill (test-record-spe-physical-address)
perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default)
perf stat -d kill (test-stat-detailed-1)
diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/attr/test-record-spe-physical-address
new file mode 100644
index 000000000000..7ebcf5012ce3
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-spe-physical-address
@@ -0,0 +1,12 @@
+[config]
+command = record
+args = --no-bpf-event -e arm_spe_0/pa_enable=1/ -- kill >/dev/null 2>&1
+ret = 1
+arch = aarch64
+
+[event-10:base-record-spe]
+# 622727 is the decimal of IP|TID|TIME|CPU|IDENTIFIER|DATA_SRC|PHYS_ADDR
+sample_type=622727
+
+# dummy event
+[event-1:base-record-spe] \ No newline at end of file
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 57b9591f7cbb..17c023823713 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -222,11 +222,11 @@ static int __test__bpf(int idx)
ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
bpf_testcase_table[idx].prog_id,
- true, NULL);
+ false, NULL);
if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
pr_debug("Unable to get BPF object, %s\n",
bpf_testcase_table[idx].msg_compile_fail);
- if (idx == 0)
+ if ((idx == 0) || (ret == TEST_SKIP))
return TEST_SKIP;
else
return TEST_FAIL;
@@ -364,9 +364,11 @@ static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
static struct test_case bpf_tests[] = {
#ifdef HAVE_LIBBPF_SUPPORT
TEST_CASE("Basic BPF filtering", basic_bpf_test),
- TEST_CASE("BPF pinning", bpf_pinning),
+ TEST_CASE_REASON("BPF pinning", bpf_pinning,
+ "clang isn't installed or environment missing BPF support"),
#ifdef HAVE_BPF_PROLOGUE
- TEST_CASE("BPF prologue generation", bpf_prologue_test),
+ TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test,
+ "clang isn't installed or environment missing BPF support"),
#else
TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
#endif
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index fac3717d9ba1..d336cda94a11 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -279,6 +279,7 @@ static const char *shell_test__description(char *description, size_t size,
{
FILE *fp;
char filename[PATH_MAX];
+ int ch;
path__join(filename, sizeof(filename), path, name);
fp = fopen(filename, "r");
@@ -286,7 +287,9 @@ static const char *shell_test__description(char *description, size_t size,
return NULL;
/* Skip shebang */
- while (fgetc(fp) != '\n');
+ do {
+ ch = fgetc(fp);
+ } while (ch != EOF && ch != '\n');
description = fgets(description, size, fp);
fclose(fp);
@@ -417,7 +420,8 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
.priv = &st,
};
- if (!perf_test__matches(test_suite.desc, curr, argc, argv))
+ if (test_suite.desc == NULL ||
+ !perf_test__matches(test_suite.desc, curr, argc, argv))
continue;
st.file = ent->d_name;
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index cc6df49a65a1..4ad0dfbc8b21 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -123,6 +123,10 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
evsel->core.attr.enable_on_exec = 0;
}
+ if (evlist__open(evlist) == -ENOENT) {
+ err = TEST_SKIP;
+ goto out_err;
+ }
CHECK__(evlist__open(evlist));
CHECK__(evlist__mmap(evlist, UINT_MAX));
diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh
index b30dba455f36..9c9ef33e0b3c 100755
--- a/tools/perf/tests/shell/stat_all_pmu.sh
+++ b/tools/perf/tests/shell/stat_all_pmu.sh
@@ -5,6 +5,16 @@
set -e
for p in $(perf list --raw-dump pmu); do
+ # In powerpc, skip the events for hv_24x7 and hv_gpci.
+ # These events needs input values to be filled in for
+ # core, chip, partition id based on system.
+ # Example: hv_24x7/CPM_ADJUNCT_INST,domain=?,core=?/
+ # hv_gpci/event,partition_id=?/
+ # Hence skip these events for ppc.
+ if echo "$p" |grep -Eq 'hv_24x7|hv_gpci' ; then
+ echo "Skipping: Event '$p' in powerpc"
+ continue
+ fi
echo "Testing $p"
result=$(perf stat -e "$p" true 2>&1)
if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 6de53b7ef5ff..e4cb4f1806ff 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -29,7 +29,6 @@ cleanup_files()
rm -f ${file}
rm -f "${perfdata}.old"
trap - exit term int
- kill -2 $$
exit $glb_err
}
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ee1e3dcbc0bd..d23a9e322ff5 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -109,6 +109,17 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
&& strncmp(session->header.env.arch, "aarch64", 7))
return TEST_SKIP;
+ /*
+ * In powerpc pSeries platform, not all the topology information
+ * are exposed via sysfs. Due to restriction, detail like
+ * physical_package_id will be set to -1. Hence skip this
+ * test if physical_package_id returns -1 for cpu from perf_cpu_map.
+ */
+ if (strncmp(session->header.env.arch, "powerpc", 7)) {
+ if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
+ return TEST_SKIP;
+ }
+
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index d2b64e3f588b..1a80151baed9 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr);
attr.type = PERF_TYPE_HARDWARE;
- attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type = evsel->core.attr.sample_type &
+ (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
- PERF_SAMPLE_WEIGHT;
+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 94624733af7e..8271ab764eb5 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -22,7 +22,8 @@
#include "record.h"
#include "util/synthetic-events.h"
-struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+struct btf *btf__load_from_kernel_by_id(__u32 id)
{
struct btf *btf;
#pragma GCC diagnostic push
@@ -32,6 +33,7 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
return err ? ERR_PTR(err) : btf;
}
+#endif
int __weak bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name __maybe_unused,
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index df7b18fb6b6e..1aad7d6d34aa 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -20,7 +20,11 @@
#include "llvm/Option/Option.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
+#if CLANG_VERSION_MAJOR >= 14
+#include "llvm/MC/TargetRegistry.h"
+#else
#include "llvm/Support/TargetRegistry.h"
+#endif
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 45a30040ec8d..a7f93f5a1ac8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
- printf("%s: nr:%" PRIu64 "\n",
- !callstack ? "... branch stack" : "... branch callstack",
- sample->branch_stack->nr);
+ if (!callstack) {
+ printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
+ } else {
+ /* the reason of adding 1 to nr is because after expanding
+ * branch stack it generates nr + 1 callstack records. e.g.,
+ * B()->C()
+ * A()->B()
+ * the final callstack should be:
+ * C()
+ * B()
+ * A()
+ */
+ printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
+ }
for (i = 0; i < sample->branch_stack->nr; i++) {
struct branch_entry *e = &entries[i];
@@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
(unsigned)e->flags.reserved,
e->flags.type ? branch_type_name(e->flags.type) : "");
} else {
- printf("..... %2"PRIu64": %016" PRIx64 "\n",
- i, i > 0 ? e->from : e->to);
+ if (i == 0) {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n"
+ "..... %2"PRIu64": %016" PRIx64 "\n",
+ i, e->to, i+1, e->from);
+ } else {
+ printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from);
+ }
}
}
}
@@ -2576,7 +2592,7 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session);
- if (perf_data__is_dir(session->data))
+ if (perf_data__is_dir(session->data) && session->data->dir.nr)
return __perf_session__process_dir_events(session);
return __perf_session__process_events(session);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 817a2de264b4..c1af37e11f98 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -472,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
int perf_event__process_stat_event(struct perf_session *session,
union perf_event *event)
{
- struct perf_counts_values count;
+ struct perf_counts_values count, *ptr;
struct perf_record_stat *st = &event->stat;
struct evsel *counter;
+ int cpu_map_idx;
count.val = st->val;
count.ena = st->ena;
@@ -485,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session,
pr_err("Failed to resolve counter for stat event.\n");
return -EINVAL;
}
-
- *perf_counts(counter->counts, st->cpu, st->thread) = count;
+ cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
+ if (cpu_map_idx == -1) {
+ pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
+ return -EINVAL;
+ }
+ ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
+ if (ptr == NULL) {
+ pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
+ st->cpu, st->thread, evsel__name(counter));
+ return -EINVAL;
+ }
+ *ptr = count;
counter->supported = true;
return 0;
}
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 31cd59a2b66e..ecd377938eea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
if (kmap) {
/*
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index dea0fc495185..f72baf636724 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
return tail - str;
}
-void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- p->end = c->start;
-}
-
const char * __weak arch__normalize_symbol_name(const char *name)
{
return name;
@@ -217,7 +212,8 @@ again:
}
}
-void symbols__fixup_end(struct rb_root_cached *symbols)
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
{
struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev;
@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr;
curr = rb_entry(nd, struct symbol, rb_node);
- if (prev->end == prev->start || prev->end != curr->start)
- arch__symbols__fixup_end(prev, curr);
+ /*
+ * On some architecture kernel text segment start is located at
+ * some low memory address, while modules are located at high
+ * memory addresses (or vice versa). The gap between end of
+ * kernel text segment and beginning of first module's text
+ * segment is very big. Therefore do not fill this gap and do
+ * not assign it to the kernel dso map (kallsyms).
+ *
+ * In kallsyms, it determines module symbols using '[' character
+ * like in:
+ * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
+ */
+ if (prev->end == prev->start) {
+ /* Last kernel/module symbol mapped to end of page */
+ if (is_kallsyms && (!strchr(prev->name, '[') !=
+ !strchr(curr->name, '[')))
+ prev->end = roundup(prev->end + 4096, 4096);
+ else
+ prev->end = curr->start;
+
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+ __func__, prev->name, prev->end);
+ }
}
/* Last entry */
@@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
if (kallsyms__delta(kmap, filename, &delta))
return -1;
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, true);
symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
@@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
#undef bfd_asymbol_section
#endif
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
dso->adjust_symbols = 1;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index fbf866d82dcc..0b893dcc8ea6 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
bool kernel);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
-void symbols__fixup_end(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
void maps__fixup_end(struct maps *maps);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name);
#define SYMBOL_A 0
#define SYMBOL_B 1
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
int arch__compare_symbol_names(const char *namea, const char *nameb);
int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n);
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 846f785e278d..7221f2f55e8b 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -42,7 +42,7 @@ ISST_IN := $(OUTPUT)intel-speed-select-in.o
$(ISST_IN): prepare FORCE
$(Q)$(MAKE) $(build)=intel-speed-select
$(OUTPUT)intel-speed-select: $(ISST_IN)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+ $(QUIET_LINK)$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
clean:
rm -f $(ALL_PROGRAMS)
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 81539f543954..d5c1bcba86fe 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -25,7 +25,8 @@ struct kmem_cache {
void (*ctor)(void *);
};
-void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+ int gfp)
{
void *p;
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
index 429f7ee735cf..fd23c80eba31 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -159,6 +159,17 @@ flooding_remotes_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
for i in $(eval echo {1..$num_remotes}); do
lsb=$((i + 1))
@@ -195,6 +206,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9..af5ea50ed5c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
tc qdisc add dev $rp2 clsact
for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index eaf8a04a7ca5..10e54bcca7a9 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -190,7 +190,7 @@ setup_prepare()
tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
- action police rate 50mbit burst 64k \
+ action police rate 50mbit burst 64k conform-exceed drop/pipe \
action goto chain $(IS2 1 0)
}
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 37db341d4cc5..d0d51adec76e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -60,6 +60,23 @@
/* CPUID.0x8000_0001.EDX */
#define CPUID_GBPAGES (1ul << 26)
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK BIT_ULL(0)
+#define PTE_WRITABLE_MASK BIT_ULL(1)
+#define PTE_USER_MASK BIT_ULL(2)
+#define PTE_ACCESSED_MASK BIT_ULL(5)
+#define PTE_DIRTY_MASK BIT_ULL(6)
+#define PTE_LARGE_MASK BIT_ULL(7)
+#define PTE_GLOBAL_MASK BIT_ULL(8)
+#define PTE_NX_MASK BIT_ULL(63)
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
+#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
u64 rax;
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index ba1fdc3dcf4a..2c4a7563a4f8 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
else
guest_test_phys_mem = p->phys_offset;
#ifdef __s390x__
- alignment = max(0x100000, alignment);
+ alignment = max(0x100000UL, alignment);
#endif
guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 9f000dfb5594..33ea5e9955d9 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -19,38 +19,6 @@
vm_vaddr_t exception_handlers;
-/* Virtual translation table structure declarations */
-struct pageUpperEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t pfn:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t reserved_07:1;
- uint64_t global:1;
- uint64_t ignored_11_09:3;
- uint64_t pfn:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent)
{
@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
return &page_table[index];
}
-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
- uint64_t pt_pfn,
- uint64_t vaddr,
- uint64_t paddr,
- int level,
- enum x86_page_size page_size)
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t pt_pfn,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int level,
+ enum x86_page_size page_size)
{
- struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
-
- if (!pte->present) {
- pte->writable = true;
- pte->present = true;
- pte->page_size = (level == page_size);
- if (pte->page_size)
- pte->pfn = paddr >> vm->page_shift;
+ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (level == page_size)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
- pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
} else {
/*
* Entry already present. Assert that the caller doesn't want
@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
TEST_ASSERT(level != page_size,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
page_size, vaddr);
- TEST_ASSERT(!pte->page_size,
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
level, vaddr);
}
@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
enum x86_page_size page_size)
{
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
"Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
vaddr, paddr, 3, page_size);
- if (pml4e->page_size)
+ if (*pml4e & PTE_LARGE_MASK)
return;
- pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
- if (pdpe->page_size)
+ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+ if (*pdpe & PTE_LARGE_MASK)
return;
- pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
- if (pde->page_size)
+ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+ if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
- TEST_ASSERT(!pte->present,
+ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
- pte->pfn = paddr >> vm->page_shift;
- pte->writable = true;
- pte->present = 1;
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
}
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -282,22 +246,22 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
}
-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
uint64_t vaddr)
{
uint16_t index[4];
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
struct kvm_cpuid_entry2 *entry;
struct kvm_sregs sregs;
int max_phy_addr;
- /* Set the bottom 52 bits. */
- uint64_t rsvd_mask = 0x000fffffffffffff;
+ uint64_t rsvd_mask = 0;
entry = kvm_get_supported_cpuid_index(0x80000008, 0);
max_phy_addr = entry->eax & 0x000000ff;
- /* Clear the bottom bits of the reserved mask. */
- rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+ /* Set the high bits in the reserved mask. */
+ if (max_phy_addr < 52)
+ rsvd_mask = GENMASK_ULL(51, max_phy_addr);
/*
* SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
@@ -307,7 +271,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
*/
vcpu_sregs_get(vm, vcpuid, &sregs);
if ((sregs.efer & EFER_NX) == 0) {
- rsvd_mask |= (1ull << 63);
+ rsvd_mask |= PTE_NX_MASK;
}
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
index[3] = (vaddr >> 39) & 0x1ffu;
pml4e = addr_gpa2hva(vm, vm->pgd);
- TEST_ASSERT(pml4e[index[3]].present,
+ TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
"Expected pml4e to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
- (rsvd_mask | (1ull << 7))) == 0,
+ TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
"Unexpected reserved bits set.");
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
- TEST_ASSERT(pdpe[index[2]].present,
+ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+ TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
"Expected pdpe to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT(pdpe[index[2]].page_size == 0,
+ TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
"Expected pdpe to map a pde not a 1-GByte page.");
- TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+ TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
"Unexpected reserved bits set.");
- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
- TEST_ASSERT(pde[index[1]].present,
+ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+ TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
"Expected pde to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT(pde[index[1]].page_size == 0,
+ TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
"Expected pde to map a pte not a 2-MByte page.");
- TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+ TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
"Unexpected reserved bits set.");
- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
- TEST_ASSERT(pte[index[0]].present,
+ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+ TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
"Expected pte to be present for gva: 0x%08lx", vaddr);
return &pte[index[0]];
@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
{
- struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
return *(uint64_t *)pte;
}
@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
uint64_t pte)
{
- struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
- vaddr);
+ uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
*(uint64_t *)new_pte = pte;
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- struct pageUpperEntry *pml4e, *pml4e_start;
- struct pageUpperEntry *pdpe, *pdpe_start;
- struct pageUpperEntry *pde, *pde_start;
- struct pageTableEntry *pte, *pte_start;
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
if (!vm->pgd_created)
return;
@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!pml4e->present)
+ if (!(*pml4e & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
- pml4e->writable, pml4e->execute_disable);
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
- pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!pdpe->present)
+ if (!(*pdpe & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->pfn, pdpe->writable,
- pdpe->execute_disable);
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
- pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!pde->present)
+ if (!(*pde & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u\n",
+ "0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->pfn, pde->writable,
- pde->execute_disable);
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
- pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!pte->present)
+ if (!(*pte & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u "
+ "0x%-12lx 0x%-10llx %u %u "
" %u 0x%-10lx\n",
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->pfn,
- pte->writable,
- pte->execute_disable,
- pte->dirty,
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
if (!vm->pgd_created)
goto unmapped_gva;
pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present)
+ if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
- if (!pdpe[index[2]].present)
+ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+ if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
- if (!pde[index[1]].present)
+ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+ if (!(pde[index[1]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
- if (!pte[index[0]].present)
+ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+ if (!(pte[index[0]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
+ return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 52a3ef6629e8..76f65c22796f 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -29,7 +29,6 @@
#define X86_FEATURE_XSAVE (1 << 26)
#define X86_FEATURE_OSXSAVE (1 << 27)
-#define PAGE_SIZE (1 << 12)
#define NUM_TILES 8
#define TILE_SIZE 1024
#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index f070ff0224fa..aeb3850f81bd 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -12,7 +12,6 @@
#include "vmx.h"
#define VCPU_ID 1
-#define PAGE_SIZE 4096
#define MAXPHYADDR 36
#define MEM_REGION_GVA 0x0000123456789000
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 0d06ffa95d9d..93d77574b255 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm)
return success;
}
-static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
{
struct kvm_pmu_event_filter *f;
int size = sizeof(*f) + nevents * sizeof(f->events[0]);
@@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
return f;
}
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+
+static struct kvm_pmu_event_filter *
+create_pmu_event_filter(const uint64_t event_list[],
+ int nevents, uint32_t action)
{
struct kvm_pmu_event_filter *f;
int i;
- f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+ f = alloc_pmu_event_filter(nevents);
f->action = action;
- for (i = 0; i < ARRAY_SIZE(event_list); i++)
+ for (i = 0; i < nevents; i++)
f->events[i] = event_list[i];
return f;
}
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+ return create_pmu_event_filter(event_list,
+ ARRAY_SIZE(event_list),
+ action);
+}
+
/*
* Remove the first occurrence of 'event' (if any) from the filter's
* event list.
@@ -271,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm,
return run_vm_to_sync(vm);
}
+static void test_amd_deny_list(struct kvm_vm *vm)
+{
+ uint64_t event = EVENT(0x1C2, 0);
+ struct kvm_pmu_event_filter *f;
+ uint64_t count;
+
+ f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
+ count = test_with_filter(vm, f);
+
+ free(f);
+ if (count != NUM_BRANCHES)
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+ __func__, count, NUM_BRANCHES);
+ TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
static void test_member_deny_list(struct kvm_vm *vm)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
@@ -453,6 +479,9 @@ int main(int argc, char *argv[])
exit(KSFT_SKIP);
}
+ if (use_amd_pmu())
+ test_amd_deny_list(vm);
+
test_without_filter(vm);
test_member_deny_list(vm);
test_member_allow_list(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index a626d40fdb48..b4e0c860769e 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -21,8 +21,6 @@
#define VCPU_ID 1
-#define PAGE_SIZE 4096
-
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index e683d0ac3e45..19b35c607dc6 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,7 +32,6 @@
#define MSR_IA32_TSC_ADJUST 0x3b
#endif
-#define PAGE_SIZE 4096
#define VCPU_ID 5
#define TSC_ADJUST_VALUE (1ll << 32)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 865e17146815..bcd370827859 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -23,7 +23,6 @@
#define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10
-#define PAGE_SIZE 4096
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
#define DUMMY_REGION_SLOT 11
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index adc94452b57c..b30fe9de1d4f 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -15,7 +15,6 @@
#define HCALL_REGION_GPA 0xc0000000ULL
#define HCALL_REGION_SLOT 10
-#define PAGE_SIZE 4096
static struct kvm_vm *vm;
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3fe2515aa616..e1f998defd10 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,12 +25,13 @@ TEST_PROGS += bareudp.sh
TEST_PROGS += amt.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_time.sh
+TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -54,12 +55,15 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
+TEST_PROGS += test_vxlan_vnifiltering.sh
TEST_FILES := settings
KSFT_KHDR_INSTALL := 1
include ../lib.mk
+include bpf/Makefile
+
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
new file mode 100644
index 000000000000..f91bf14bbee7
--- /dev/null
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CLANG ?= clang
+CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../../../usr/include/
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
+all: $(TEST_CUSTOM_PROGS)
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
+
+clean:
+ rm -f $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c
new file mode 100644
index 000000000000..ac54c36b25fc
--- /dev/null
+++ b/tools/testing/selftests/net/bpf/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000 // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+ const int l2_header_size = sizeof(struct ethhdr);
+ void *data = (void *)(long)skb->data;
+ const void *data_end = (void *)(long)skb->data_end;
+ const struct ethhdr * const eth = data; // used iff is_ethernet
+ const struct ipv6hdr * const ip6 = (void *)(eth + 1);
+
+ // Require ethernet dst mac address to be our unicast address.
+ if (skb->pkt_type != PACKET_HOST)
+ return TC_ACT_OK;
+
+ // Must be meta-ethernet IPv6 frame
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ // Must have (ethernet and) ipv6 header
+ if (data + l2_header_size + sizeof(*ip6) > data_end)
+ return TC_ACT_OK;
+
+ // Ethertype - if present - must be IPv6
+ if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ // IP version must be 6
+ if (ip6->version != 6)
+ return TC_ACT_OK;
+ // Maximum IPv6 payload length that can be translated to IPv4
+ if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+ return TC_ACT_OK;
+ switch (ip6->nexthdr) {
+ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
+ case IPPROTO_UDP: // address means there is no need to update their checksums.
+ case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers,
+ case IPPROTO_ESP: // since there is never a checksum to update.
+ break;
+ default: // do not know how to handle anything else
+ return TC_ACT_OK;
+ }
+
+ struct ethhdr eth2; // used iff is_ethernet
+
+ eth2 = *eth; // Copy over the ethernet header (src/dst mac)
+ eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype
+
+ struct iphdr ip = {
+ .version = 4, // u4
+ .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4
+ .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8
+ .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16
+ .id = 0, // u16
+ .frag_off = bpf_htons(IP_DF), // u16
+ .ttl = ip6->hop_limit, // u8
+ .protocol = ip6->nexthdr, // u8
+ .check = 0, // u16
+ .saddr = 0x0201a8c0, // u32
+ .daddr = 0x0101a8c0, // u32
+ };
+
+ // Calculate the IPv4 one's complement checksum of the IPv4 header.
+ __wsum sum4 = 0;
+
+ for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+ sum4 += ((__u16 *)&ip)[i];
+
+ // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
+ ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF
+
+ // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+ __wsum sum6 = 0;
+ // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+ for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+ sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation
+
+ // Note that there is no L4 checksum update: we are relying on the checksum neutrality
+ // of the ipv6 address chosen by netd's ClatdController.
+
+ // Packet mutations begin - point of no return, but if this first modification fails
+ // the packet is probably still pristine, so let clatd handle it.
+ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+ return TC_ACT_OK;
+ bpf_csum_update(skb, sum6);
+
+ data = (void *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+ if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+ return TC_ACT_SHOT;
+
+ struct ethhdr *new_eth = data;
+
+ // Copy over the updated ethernet header
+ *new_eth = eth2;
+
+ // Copy over the new ipv4 header.
+ *(struct iphdr *)(new_eth + 1) = ip;
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+ const int l2_header_size = sizeof(struct ethhdr);
+ void *data = (void *)(long)skb->data;
+ const void *data_end = (void *)(long)skb->data_end;
+ const struct ethhdr *const eth = data; // used iff is_ethernet
+ const struct iphdr *const ip4 = (void *)(eth + 1);
+
+ // Must be meta-ethernet IPv4 frame
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ // Must have ipv4 header
+ if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+ return TC_ACT_OK;
+
+ // Ethertype - if present - must be IPv4
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ // IP version must be 4
+ if (ip4->version != 4)
+ return TC_ACT_OK;
+
+ // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+ if (ip4->ihl != 5)
+ return TC_ACT_OK;
+
+ // Maximum IPv6 payload length that can be translated to IPv4
+ if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+ return TC_ACT_OK;
+
+ // Calculate the IPv4 one's complement checksum of the IPv4 header.
+ __wsum sum4 = 0;
+
+ for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+ sum4 += ((__u16 *)ip4)[i];
+
+ // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
+ // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+ if (sum4 != 0xFFFF)
+ return TC_ACT_OK;
+
+ // Minimum IPv4 total length is the size of the header
+ if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+ return TC_ACT_OK;
+
+ // We are incapable of dealing with IPv4 fragments
+ if (ip4->frag_off & ~bpf_htons(IP_DF))
+ return TC_ACT_OK;
+
+ switch (ip4->protocol) {
+ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
+ case IPPROTO_GRE: // address means there is no need to update their checksums.
+ case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers,
+ break; // since there is never a checksum to update.
+
+ case IPPROTO_UDP: // See above comment, but must also have UDP header...
+ if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+ return TC_ACT_OK;
+ const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+ // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+ // checksum. Otherwise the network or more likely the NAT64 gateway might
+ // drop the packet because in most cases IPv6/UDP packets with a zero checksum
+ // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff()
+ if (!uh->check)
+ return TC_ACT_OK;
+ break;
+
+ default: // do not know how to handle anything else
+ return TC_ACT_OK;
+ }
+ struct ethhdr eth2; // used iff is_ethernet
+
+ eth2 = *eth; // Copy over the ethernet header (src/dst mac)
+ eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype
+
+ struct ipv6hdr ip6 = {
+ .version = 6, // __u8:4
+ .priority = ip4->tos >> 4, // __u8:4
+ .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3]
+ .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16
+ .nexthdr = ip4->protocol, // __u8
+ .hop_limit = ip4->ttl, // __u8
+ };
+ ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+ ip6.saddr.in6_u.u6_addr32[1] = 0;
+ ip6.saddr.in6_u.u6_addr32[2] = 0;
+ ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+ ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+ ip6.daddr.in6_u.u6_addr32[1] = 0;
+ ip6.daddr.in6_u.u6_addr32[2] = 0;
+ ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+ // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+ __wsum sum6 = 0;
+ // We'll end up with a non-zero sum due to ip6.version == 6
+ for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+ sum6 += ((__u16 *)&ip6)[i];
+
+ // Packet mutations begin - point of no return, but if this first modification fails
+ // the packet is probably still pristine, so let clatd handle it.
+ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+ return TC_ACT_OK;
+
+ // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+ // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+ // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+ // However, we've already verified the ipv4 checksum is correct and thus 0.
+ // Thus we only need to add the ipv6 header's sum.
+ //
+ // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+ // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details).
+ bpf_csum_update(skb, sum6);
+
+ // bpf_skb_change_proto() invalidates all pointers - reload them.
+ data = (void *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ // I cannot think of any valid way for this error condition to trigger, however I do
+ // believe the explicit check is required to keep the in kernel ebpf verifier happy.
+ if (data + l2_header_size + sizeof(ip6) > data_end)
+ return TC_ACT_SHOT;
+
+ struct ethhdr *new_eth = data;
+
+ // Copy over the updated ethernet header
+ *new_eth = eth2;
+ // Copy over the new ipv4 header.
+ *(struct ipv6hdr *)(new_eth + 1) = ip6;
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 47c4d4b4a44a..54701c8b0cd7 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -810,10 +810,16 @@ ipv4_ping()
setup
set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
ipv4_ping_novrf
+ setup
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv4_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv4_ping_vrf
+ setup "yes"
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv4_ping_vrf
}
################################################################################
@@ -2348,10 +2354,16 @@ ipv6_ping()
log_subsection "No VRF"
setup
ipv6_ping_novrf
+ setup
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv6_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv6_ping_vrf
+ setup "yes"
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+ ipv6_ping_vrf
}
################################################################################
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8fa97ae9af9e..e811090f7748 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,15 +2,31 @@
TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
+ bridge_mld.sh \
bridge_port_isolation.sh \
bridge_sticky_fdb.sh \
bridge_vlan_aware.sh \
+ bridge_vlan_mcast.sh \
bridge_vlan_unaware.sh \
+ custom_multipath_hash.sh \
+ dual_vxlan_bridge.sh \
+ ethtool_extended_state.sh \
ethtool.sh \
+ gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
+ gre_multipath_nh_res.sh \
+ gre_multipath_nh.sh \
gre_multipath.sh \
+ hw_stats_l3.sh \
ip6_forward_instats_vrf.sh \
+ ip6gre_custom_multipath_hash.sh \
+ ip6gre_flat_key.sh \
+ ip6gre_flat_keys.sh \
+ ip6gre_flat.sh \
+ ip6gre_hier_key.sh \
+ ip6gre_hier_keys.sh \
+ ip6gre_hier.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
ipip_flat_gre_key.sh \
@@ -34,36 +50,53 @@ TEST_PROGS = bridge_igmp.sh \
mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
mirror_vlan.sh \
+ pedit_dsfield.sh \
+ pedit_ip.sh \
+ pedit_l4port.sh \
+ q_in_vni_ipv6.sh \
+ q_in_vni.sh \
router_bridge.sh \
router_bridge_vlan.sh \
router_broadcast.sh \
+ router_mpath_nh_res.sh \
router_mpath_nh.sh \
router_multicast.sh \
router_multipath.sh \
+ router_nh.sh \
router.sh \
router_vid_1.sh \
sch_ets.sh \
+ sch_red.sh \
sch_tbf_ets.sh \
sch_tbf_prio.sh \
sch_tbf_root.sh \
+ skbedit_priority.sh \
tc_actions.sh \
tc_chains.sh \
tc_flower_router.sh \
tc_flower.sh \
tc_mpls_l2vpn.sh \
+ tc_police.sh \
tc_shblocks.sh \
tc_vlan_modify.sh \
+ vxlan_asymmetric_ipv6.sh \
vxlan_asymmetric.sh \
+ vxlan_bridge_1d_ipv6.sh \
+ vxlan_bridge_1d_port_8472_ipv6.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
+ vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
+ vxlan_symmetric_ipv6.sh \
vxlan_symmetric.sh
TEST_PROGS_EXTENDED := devlink_lib.sh \
ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
+ ip6gre_lib.sh \
ipip_lib.sh \
lib.sh \
mirror_gre_lib.sh \
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index a3402cd8d5b6..9ff22f28032d 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -61,9 +61,12 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
+ # Avoid changing br1's PVID while it is operational as a L3 interface.
+ ip link set dev br1 down
ip link set dev $swp3 master br1
bridge vlan add dev br1 vid 555 pvid untagged self
+ ip link set dev br1 up
ip address add dev br1 192.0.2.129/28
ip address add dev br1 2001:db8:2::1/64
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7314257d248a..48ef112f42c2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1444,6 +1444,33 @@ chk_prio_nr()
[ "${dump_stats}" = 1 ] && dump_stats
}
+chk_subflow_nr()
+{
+ local need_title="$1"
+ local msg="$2"
+ local subflow_nr=$3
+ local cnt1
+ local cnt2
+
+ if [ -n "${need_title}" ]; then
+ printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
+ else
+ printf "%-${nr_blank}s %s" " " "${msg}"
+ fi
+
+ cnt1=$(ss -N $ns1 -tOni | grep -c token)
+ cnt2=$(ss -N $ns2 -tOni | grep -c token)
+ if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then
+ echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr"
+ fail_test
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint )
+}
+
chk_link_usage()
{
local ns=$1
@@ -2556,7 +2583,7 @@ fastclose_tests()
fi
}
-implicit_tests()
+endpoint_tests()
{
# userspace pm type prevents add_addr
if reset "implicit EP"; then
@@ -2578,6 +2605,23 @@ implicit_tests()
$ns2 10.0.2.2 id 1 flags signal
wait
fi
+
+ if reset "delete and re-add"; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow &
+
+ wait_mpj $ns2
+ pm_nl_del_endpoint $ns2 2 10.0.2.2
+ sleep 0.5
+ chk_subflow_nr needtitle "after delete" 1
+
+ pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "" "after re-add" 2
+ wait
+ fi
}
# [$1: error message]
@@ -2624,7 +2668,7 @@ all_tests_sorted=(
d@deny_join_id0_tests
m@fullmesh_tests
z@fastclose_tests
- I@implicit_tests
+ I@endpoint_tests
)
all_tests_args=""
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 59067f64b775..2672ac0b6d1f 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -421,7 +421,7 @@ static void usage(const char *progname)
"Options:\n"
" -4 only IPv4\n"
" -6 only IPv6\n"
- " -c <clock> monotonic (default) or tai\n"
+ " -c <clock> monotonic or tai (default)\n"
" -D <addr> destination IP address (server)\n"
" -S <addr> source IP address (client)\n"
" -r run rx mode\n"
@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
cfg_rx = true;
break;
case 't':
- cfg_start_time_ns = strtol(optarg, NULL, 0);
+ cfg_start_time_ns = strtoll(optarg, NULL, 0);
break;
case 'm':
cfg_mark = strtol(optarg, NULL, 0);
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755
index 000000000000..807b74c8fd80
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+ local -r jobs="$(jobs -p)"
+ local -r ns="$(ip netns list|grep $PEER_NS)"
+
+ [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+ [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+ # use 'rx' as separator between sender args and receiver args
+ local -r all="$@"
+ local -r tx_args=${all%rx*}
+ local rx_args=${all#*rx}
+
+
+
+ ip netns add "${PEER_NS}"
+ ip -netns "${PEER_NS}" link set lo up
+ ip link add type veth
+ ip link set dev veth0 up
+ ip addr add dev veth0 192.168.1.2/24
+ ip addr add dev veth0 2001:db8::2/64 nodad
+
+ ip link set dev veth1 netns "${PEER_NS}"
+ ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+ ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+ ip -netns "${PEER_NS}" link set dev veth1 up
+ ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+ echo ${rx_args}
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+ # Hack: let bg programs complete the startup
+ sleep 0.1
+ ./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+ local -r args=$@
+ echo ${args}
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp gso - over veth touching data"
+ run_in_netns ${args} -u -S 0 rx -4 -v
+
+ echo "udp gso and gro - over veth touching data"
+ run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp - over veth touching data"
+ run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
+ local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+
+ echo "ipv6"
+ run_tcp "${ipv6_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+ echo "Missing xdp_dummy helper. Build bpf selftest first"
+ exit -1
+fi
+
+if [ ! -f bpf/nat6to4.o ]; then
+ echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+ exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index c35ba24f994c..66d0414d8e4b 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -301,7 +301,7 @@ specify_qemu_cpus () {
echo $2 -smp $3
;;
qemu-system-ppc64)
- nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+ nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`"
echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
;;
esac
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 5f682fc892dd..88983cba7956 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -36,7 +36,7 @@ do
then
egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
files="$files $i.diags $i"
- elif ! test -f ${scenariobasedir}/vmlinux
+ elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
then
echo No ${scenariobasedir}/vmlinux file > $i.diags
files="$files $i.diags $i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0a5419982ab3..0789c5606d2a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -33,7 +33,12 @@ do
TORTURE_SUITE="`cat $i/../torture_suite`"
configfile=`echo $i | sed -e 's,^.*/,,'`
rm -f $i/console.log.*.diags
- kvm-recheck-${TORTURE_SUITE}.sh $i
+ case "${TORTURE_SUITE}" in
+ X*)
+ ;;
+ *)
+ kvm-recheck-${TORTURE_SUITE}.sh $i
+ esac
if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
then
echo QEMU error, output:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 8c4c1e4792d0..0ff59bd8b640 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -138,14 +138,14 @@ chmod +x $T/bin/kvm-remote-*.sh
# Check first to avoid the need for cleanup for system-name typos
for i in $systems
do
- ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
- echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+ ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
ret=$?
if test "$ret" -ne 0
then
echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
exit 4
fi
+ echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
done
# Download and expand the tarball on all systems.
@@ -153,14 +153,14 @@ echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
for i in $systems
do
echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
- cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+ cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
ret=$?
tries=0
while test "$ret" -ne 0
do
echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log"
sleep 60
- cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+ cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
ret=$?
if test "$ret" -ne 0
then
@@ -185,7 +185,7 @@ checkremotefile () {
while :
do
- ssh $1 "test -f \"$2\""
+ ssh -o BatchMode=yes $1 "test -f \"$2\""
ret=$?
if test "$ret" -eq 255
then
@@ -228,7 +228,7 @@ startbatches () {
then
continue # System still running last test, skip.
fi
- ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+ ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
ret=$?
if test "$ret" -ne 0
then
@@ -267,7 +267,7 @@ do
sleep 30
done
echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
- ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+ ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 55b2c1533282..263e16aeca0e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -44,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
+torture_qemu_mem_default=1
TORTURE_REMOTE=
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
@@ -86,7 +87,7 @@ usage () {
echo " --remote"
echo " --results absolute-pathname"
echo " --shutdown-grace seconds"
- echo " --torture lock|rcu|rcuscale|refscale|scf"
+ echo " --torture lock|rcu|rcuscale|refscale|scf|X*"
echo " --trust-make"
exit 1
}
@@ -180,6 +181,10 @@ do
;;
--kasan)
TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ if test -n "$torture_qemu_mem_default"
+ then
+ TORTURE_QEMU_MEM=2G
+ fi
;;
--kconfig|--kconfigs)
checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
@@ -202,6 +207,7 @@ do
--memory)
checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
TORTURE_QEMU_MEM=$2
+ torture_qemu_mem_default=
shift
;;
--no-initrd)
@@ -231,7 +237,7 @@ do
shift
;;
--torture)
- checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--'
TORTURE_SUITE=$2
TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
shift
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bfe09e2829c8..d477618e7261 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -54,6 +54,7 @@ do_kvfree=yes
do_kasan=yes
do_kcsan=no
do_clocksourcewd=yes
+do_rt=yes
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -82,6 +83,7 @@ usage () {
echo " --do-rcuscale / --do-no-rcuscale"
echo " --do-rcutorture / --do-no-rcutorture"
echo " --do-refscale / --do-no-refscale"
+ echo " --do-rt / --do-no-rt"
echo " --do-scftorture / --do-no-scftorture"
echo " --duration [ <minutes> | <hours>h | <days>d ]"
echo " --kcsan-kmake-arg kernel-make-arguments"
@@ -118,6 +120,7 @@ do
do_scftorture=yes
do_rcuscale=yes
do_refscale=yes
+ do_rt=yes
do_kvfree=yes
do_kasan=yes
do_kcsan=yes
@@ -148,6 +151,7 @@ do
do_scftorture=no
do_rcuscale=no
do_refscale=no
+ do_rt=no
do_kvfree=no
do_kasan=no
do_kcsan=no
@@ -162,6 +166,9 @@ do
--do-refscale|--do-no-refscale)
do_refscale=`doyesno "$1" --do-refscale`
;;
+ --do-rt|--do-no-rt)
+ do_rt=`doyesno "$1" --do-rt`
+ ;;
--do-scftorture|--do-no-scftorture)
do_scftorture=`doyesno "$1" --do-scftorture`
;;
@@ -322,6 +329,7 @@ then
echo " --- make clean" > "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
+ cp .config $amcdir
make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
echo " --- make " >> "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
@@ -350,8 +358,19 @@ fi
if test "$do_scftorture" = "yes"
then
- torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot"
- torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+ torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
+ torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+fi
+
+if test "$do_rt" = "yes"
+then
+ # With all post-boot grace periods forced to normal.
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
+ torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+
+ # With all post-boot grace periods forced to expedited.
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
+ torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
fi
if test "$do_refscale" = yes
@@ -363,7 +382,7 @@ fi
for prim in $primlist
do
torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
done
if test "$do_rcuscale" = yes
@@ -375,13 +394,13 @@ fi
for prim in $primlist
do
torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
done
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
- torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
fi
if test "$do_clocksourcewd" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 7093422050f6..6fd6acb94518 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
CONFIG_RCU_EXPERT=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 2da8b49589a0..07f5e0a70ae7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 3ca112444ce7..d84801b9a7ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -7,4 +7,5 @@ CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_RCU=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index ad2be91e5ee7..2f9fcffff5ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,3 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
index cd2a188eeb6d..b9b6d67cbc5f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks
+rcutorture.stat_interval=60
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index dc02083803ce..dea26c568678 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -7,3 +7,5 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index e4d74e5fc1d0..85b407467454 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -4,8 +4,11 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 77541eeb4e9f..093ea6e8e65c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -7,5 +7,7 @@ CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=n
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 22ad0261728d..ae395981b5e5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -1,8 +1,9 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index 2789b47e4ecd..d30922d8c883 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=16
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 8523a7515cbf..fc45645bb5f4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 4a00539bfdd7..a323d8948b7c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=56
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index effa415f9b92..e2bc99c785e7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -9,7 +9,7 @@
# rcutorture_param_n_barrier_cbs bootparam-string
#
-# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+# Adds n_barrier_cbs rcutorture module parameter if not already specified.
rcutorture_param_n_barrier_cbs () {
if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
then
@@ -30,13 +30,25 @@ rcutorture_param_onoff () {
fi
}
+# rcutorture_param_stat_interval bootparam-string
+#
+# Adds stat_interval rcutorture module parameter if not already specified.
+rcutorture_param_stat_interval () {
+ if echo $1 | grep -q "rcutorture\.stat_interval"
+ then
+ :
+ else
+ echo rcutorture.stat_interval=15
+ fi
+}
+
# per_version_boot_params bootparam-string config-file seconds
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
echo $1 `rcutorture_param_onoff "$1" "$2"` \
`rcutorture_param_n_barrier_cbs "$1"` \
- rcutorture.stat_interval=15 \
+ `rcutorture_param_stat_interval "$1"` \
rcutorture.shutdown_secs=$3 \
rcutorture.test_no_idle_hz=1 \
rcutorture.verbose=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 90942bb5bebc..6a00157bee5b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -1,5 +1,6 @@
CONFIG_RCU_SCALE_TEST=y
CONFIG_PRINTK_TIME=y
-CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_TRACE_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index f110d9ffbe4c..b10706fd03a4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
@@ -16,3 +16,5 @@ CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_TRACE=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
index a98b58b54bb1..fbea3b13baba 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
@@ -1,2 +1,6 @@
CONFIG_RCU_REF_SCALE_TEST=y
CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index 7f06838a91e6..ef2b501a6971 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -15,3 +15,5 @@ CONFIG_PROVE_LOCKING=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
index b8429d6c6ebc..3a59346b3de7 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -7,3 +7,5 @@ CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
index ae4992b141b0..cb37e08037d6 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -7,3 +7,4 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
index d3d9e35d3d55..2d949e58f5a5 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -25,6 +25,5 @@ per_version_boot_params () {
echo $1 `scftorture_param_onoff "$1" "$2"` \
scftorture.stat_interval=15 \
scftorture.shutdown_secs=$3 \
- scftorture.verbose=1 \
- scf
+ scftorture.verbose=1
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..313bb0cbfb1e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -955,7 +955,7 @@ TEST(ERRNO_valid)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(E2BIG, errno);
}
@@ -974,7 +974,7 @@ TEST(ERRNO_zero)
EXPECT_EQ(parent, syscall(__NR_getppid));
/* "errno" of 0 is ok. */
- EXPECT_EQ(0, read(0, NULL, 0));
+ EXPECT_EQ(0, read(-1, NULL, 0));
}
/*
@@ -995,7 +995,7 @@ TEST(ERRNO_capped)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(4095, errno);
}
@@ -1026,7 +1026,7 @@ TEST(ERRNO_order)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(12, errno);
}
@@ -2623,7 +2623,7 @@ void *tsync_sibling(void *data)
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
if (!ret)
return (void *)SIBLING_EXIT_NEWPRIVS;
- read(0, NULL, 0);
+ read(-1, NULL, 0);
return (void *)SIBLING_EXIT_UNKILLED;
}
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 04a49e876a46..5b1ecd00695b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog
CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
CFLAGS += -no-pie
@@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
@@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
# x86_64 users should be encouraged to install 32-bit libraries
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 7c0b0617b9f8..db0270127aeb 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -6,9 +6,11 @@
#include <errno.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
+#include <stdbool.h>
#include "../kselftest.h"
@@ -64,6 +66,59 @@ enum {
}
/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
*/
@@ -71,11 +126,18 @@ static void *get_source_mapping(struct config c)
{
unsigned long long addr = 0ULL;
void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+
retry:
addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
- -1, 0);
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
if (src_addr == MAP_FAILED) {
if (errno == EPERM || errno == EEXIST)
goto retry;
@@ -90,8 +152,10 @@ retry:
* alignment in the tests.
*/
if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
- !((unsigned long long) src_addr & c.src_alignment))
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
goto retry;
+ }
if (!src_addr)
goto error;
@@ -140,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
if (!((unsigned long long) addr & c.dest_alignment))
addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+ /* Don't destroy existing mappings unless expected to overlap */
+ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+ /* Check for unsigned overflow */
+ if (addr + c.dest_alignment < addr) {
+ ksft_print_msg("Couldn't find a valid region to remap to\n");
+ ret = -1;
+ goto out;
+ }
+ addr += c.dest_alignment;
+ }
+
clock_gettime(CLOCK_MONOTONIC, &t_start);
dest_addr = mremap(src_addr, c.region_size, c.region_size,
- MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
clock_gettime(CLOCK_MONOTONIC, &t_end);
if (dest_addr == MAP_FAILED) {
@@ -193,7 +268,7 @@ static void run_mremap_test_case(struct test test_case, int *failures,
if (remap_time < 0) {
if (test_case.expect_failure)
- ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+ ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
test_case.name);
else {
ksft_test_result_fail("%s\n", test_case.name);
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 3b265f140c25..352ba00cf26b 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -291,11 +291,16 @@ echo "-------------------"
echo "running mremap_test"
echo "-------------------"
./mremap_test
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
echo "[FAIL]"
exitcode=1
-else
- echo "[PASS]"
fi
echo "-----------------"
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 8a9461aa0878..69c7796c7ca9 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -22,10 +22,12 @@
# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
# details on how this is accomplished.
set -e
+shopt -s extglob
exec 3>&1
export LANG=C
export WG_HIDE_KEYS=never
+NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]}
netns0="wg-test-$$-0"
netns1="wg-test-$$-1"
netns2="wg-test-$$-2"
@@ -143,17 +145,15 @@ tests() {
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
# TCP over IPv4, in parallel
- for max in 4 5 50; do
- local pids=( )
- for ((i=0; i < max; ++i)) do
- n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
- pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
- done
- for ((i=0; i < max; ++i)) do
- n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
- done
- wait "${pids[@]}"
+ local pids=( ) i
+ for ((i=0; i < NPROC; ++i)) do
+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
done
+ for ((i=0; i < NPROC; ++i)) do
+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+ done
+ wait "${pids[@]}"
}
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
@@ -280,7 +280,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
sleep 1
read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
-(( tx_bytes_after - tx_bytes_before < 70000 ))
+if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then
+ errstart=$'\x1b[37m\x1b[41m\x1b[1m'
+ errend=$'\x1b[0m'
+ echo "${errstart} ${errend}"
+ echo "${errstart} E R R O R ${errend}"
+ echo "${errstart} ${errend}"
+ echo "${errstart} This architecture does not do the right thing ${errend}"
+ echo "${errstart} with cross-namespace routing loops. This test ${errend}"
+ echo "${errstart} has thus technically failed but, as this issue ${errend}"
+ echo "${errstart} is as yet unsolved, these tests will continue ${errend}"
+ echo "${errstart} onward. :( ${errend}"
+ echo "${errstart} ${errend}"
+fi
ip0 link del wg1
ip1 link del wg0
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
index bfa15e6feb2f..42ab9d72b37b 100644
--- a/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
build/
distfiles/
+ccache/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 4bdd6c1a19d3..bca07b93eeb0 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -4,26 +4,24 @@
PWD := $(shell pwd)
-CHOST := $(shell gcc -dumpmachine)
-HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
-ifneq (,$(ARCH))
-CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
-ifeq (,$(CBUILD))
-$(error The toolchain for $(ARCH) is not installed)
-endif
-else
-CBUILD := $(CHOST)
-ARCH := $(firstword $(subst -, ,$(CBUILD)))
-endif
-
# Set these from the environment to override
KERNEL_PATH ?= $(PWD)/../../../../..
BUILD_PATH ?= $(PWD)/build/$(ARCH)
DISTFILES_PATH ?= $(PWD)/distfiles
NR_CPUS ?= 4
+ARCH ?=
+CBUILD := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CBUILD)))
+ifeq ($(ARCH),)
+ARCH := $(HOST_ARCH)
+endif
MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
default: qemu
# variable name, tarball project name, version, tarball extension, default URI base
@@ -36,42 +34,33 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
endef
define file_download =
-$(DISTFILES_PATH)/$(1):
+$(DISTFILES_PATH)/$(1): | $(4)
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
-$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
-$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
-$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
-$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
-$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f))
+$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
+$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
-
-KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
-
-export CFLAGS ?= -O3 -pipe
-export LDFLAGS ?=
-export CPPFLAGS := -I$(BUILD_PATH)/include
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+export CFLAGS := -O3 -pipe
ifeq ($(HOST_ARCH),$(ARCH))
-CROSS_COMPILE_FLAG := --host=$(CHOST)
CFLAGS += -march=native
-STRIP := strip
-else
-$(info Cross compilation: building for $(CBUILD) using $(CHOST))
-CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
-export CROSS_COMPILE=$(CBUILD)-
-STRIP := $(CBUILD)-strip
endif
+export LDFLAGS :=
+export CPPFLAGS :=
+
+QEMU_VPORT_RESULT :=
ifeq ($(ARCH),aarch64)
+CHOST := aarch64-linux-musl
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -79,9 +68,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
CFLAGS += -march=armv8-a -mtune=cortex-a53
endif
else ifeq ($(ARCH),aarch64_be)
+CHOST := aarch64_be-linux-musl
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -89,9 +80,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
CFLAGS += -march=armv8-a -mtune=cortex-a53
endif
else ifeq ($(ARCH),arm)
+CHOST := arm-linux-musleabi
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -99,9 +92,11 @@ QEMU_MACHINE := -cpu cortex-a15 -machine virt
CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
endif
else ifeq ($(ARCH),armeb)
+CHOST := armeb-linux-musleabi
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
@@ -110,6 +105,7 @@ CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due
LDFLAGS += -Wl,--be8
endif
else ifeq ($(ARCH),x86_64)
+CHOST := x86_64-linux-musl
QEMU_ARCH := x86_64
KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -120,6 +116,7 @@ QEMU_MACHINE := -cpu Skylake-Server -machine q35
CFLAGS += -march=skylake-avx512
endif
else ifeq ($(ARCH),i686)
+CHOST := i686-linux-musl
QEMU_ARCH := i386
KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -130,6 +127,7 @@ QEMU_MACHINE := -cpu coreduo -machine q35
CFLAGS += -march=prescott
endif
else ifeq ($(ARCH),mips64)
+CHOST := mips64-linux-musl
QEMU_ARCH := mips64
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -141,6 +139,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
CFLAGS += -march=mips64r2 -EB
endif
else ifeq ($(ARCH),mips64el)
+CHOST := mips64el-linux-musl
QEMU_ARCH := mips64el
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -152,6 +151,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
CFLAGS += -march=mips64r2 -EL
endif
else ifeq ($(ARCH),mips)
+CHOST := mips-linux-musl
QEMU_ARCH := mips
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -163,6 +163,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
CFLAGS += -march=mips32r2 -EB
endif
else ifeq ($(ARCH),mipsel)
+CHOST := mipsel-linux-musl
QEMU_ARCH := mipsel
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -173,7 +174,18 @@ else
QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
CFLAGS += -march=mips32r2 -EL
endif
+else ifeq ($(ARCH),powerpc64)
+CHOST := powerpc64-linux-musl
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
else ifeq ($(ARCH),powerpc64le)
+CHOST := powerpc64le-linux-musl
QEMU_ARCH := ppc64
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -182,8 +194,8 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
QEMU_MACHINE := -machine pseries
endif
-CFLAGS += -mcpu=powerpc64le -mlong-double-64
else ifeq ($(ARCH),powerpc)
+CHOST := powerpc-linux-musl
QEMU_ARCH := ppc
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
@@ -192,26 +204,79 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
else
QEMU_MACHINE := -machine ppce500
endif
-CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
else ifeq ($(ARCH),m68k)
+CHOST := m68k-linux-musl
QEMU_ARCH := m68k
KERNEL_ARCH := m68k
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
else
QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
endif
+else ifeq ($(ARCH),riscv64)
+CHOST := riscv64-linux-musl
+QEMU_ARCH := riscv64
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv64 -machine virt
+endif
+else ifeq ($(ARCH),riscv32)
+CHOST := riscv32-linux-musl
+QEMU_ARCH := riscv32
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv32 -machine virt
+endif
+else ifeq ($(ARCH),s390x)
+CHOST := s390x-linux-musl
+QEMU_ARCH := s390x
+KERNEL_ARCH := s390
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config)
+QEMU_VPORT_RESULT := virtio-serial-ccw
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+endif
else
-$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
+endif
+
+TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
+TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME)
+TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross
+TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/
+$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8))
+$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123))
+
+STRIP := $(CHOST)-strip
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+$(info Building for $(CHOST) using $(CBUILD))
+export CROSS_COMPILE := $(CHOST)-
+export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
+export CC := $(CHOST)-gcc
+CCACHE_PATH := $(shell which ccache 2>/dev/null)
+ifneq ($(CCACHE_PATH),)
+export KBUILD_BUILD_TIMESTAMP := Fri Jun 5 15:58:00 CEST 2015
+export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH)
+export CCACHE_SLOPPINESS := file_macro,time_macros
+export CCACHE_DIR ?= $(PWD)/ccache
endif
-REAL_CC := $(CBUILD)-gcc
-MUSL_CC := $(BUILD_PATH)/musl-gcc
-export CC := $(MUSL_CC)
-USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
+comma := ,
build: $(KERNEL_BZIMAGE)
qemu: $(KERNEL_BZIMAGE)
rm -f $(BUILD_PATH)/result
@@ -222,13 +287,14 @@ qemu: $(KERNEL_BZIMAGE)
$(QEMU_MACHINE) \
-m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
-serial stdio \
- -serial file:$(BUILD_PATH)/result \
+ -chardev file,path=$(BUILD_PATH)/result,id=result \
+ $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \
-no-reboot \
-monitor none \
-kernel $<
grep -Fq success $(BUILD_PATH)/result
-$(BUILD_PATH)/init-cpio-spec.txt:
+$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
mkdir -p $(BUILD_PATH)
echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
@@ -246,10 +312,10 @@ $(BUILD_PATH)/init-cpio-spec.txt:
echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
echo "slink /bin/ping6 ping 777 0 0" >> $@
echo "dir /lib 755 0 0" >> $@
- echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
- echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+ echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@
+ echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@
-$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config
mkdir -p $(KERNEL_BUILD_PATH)
cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
@@ -258,29 +324,24 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
-$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
+ rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
touch $@
-$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
- $(MAKE) -C $(MUSL_PATH)
- $(STRIP) -s $@
-
-$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
- $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+ $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
+ifneq ($(CCACHE_PATH),)
+ mkdir -p $(TOOLCHAIN_PATH)/bin/ccache
+ ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC)
+endif
touch $@
-$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
- sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
- chmod +x $(BUILD_PATH)/musl-gcc
-
$(IPERF_PATH)/.installed: $(IPERF_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
@@ -289,6 +350,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
touch $@
$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPERF_PATH) && autoreconf -fi
cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
@@ -304,7 +366,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
mkdir -p $(BUILD_PATH)
- $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
$(STRIP) -s $@
$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
@@ -323,15 +385,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
touch $@
$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
- cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
$(MAKE) -C $(BASH_PATH)
$(STRIP) -s $@
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
- printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk
+ printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
touch $@
$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
@@ -370,8 +432,13 @@ clean:
distclean: clean
rm -rf $(DISTFILES_PATH)
+cacheclean: clean
+ifneq ($(CCACHE_DIR),)
+ rm -rf $(CCACHE_DIR)
+endif
+
menuconfig: $(KERNEL_BUILD_PATH)/.config
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
-.PHONY: qemu build clean distclean menuconfig
+.PHONY: qemu build clean distclean cacheclean menuconfig
.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
index 3d063bb247bb..09016880ce03 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -1,5 +1,8 @@
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
index dbdc7e406a7b..19ff66e4c602 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -1,6 +1,9 @@
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
index 148f49905418..fc7959bef9c2 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y
CONFIG_THUMB2_KERNEL=n
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
index bd76b07d00a2..f3066be81c19 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y
CONFIG_THUMB2_KERNEL=n
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
index a85025d7206e..6d90892a85a2 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -1,5 +1,6 @@
+CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
index 62a15bdb877e..82c925e49beb 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -5,5 +5,5 @@ CONFIG_MAC=y
CONFIG_SERIAL_PMACZILOG=y
CONFIG_SERIAL_PMACZILOG_TTYS=y
CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
index df71d6b95546..d7ec63c17b30 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
index 90c783f725c4..0994947e3392 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
index 435b0b43e00c..591184342f47 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
index 62bb50c4a85f..18a498293737 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
index 57957093b71b..5e04882e8e35 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_MATH_EMULATION=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
new file mode 100644
index 000000000000..737194b7619e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
@@ -0,0 +1,13 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
index f52f1e2bc7f6..8148b9d1220a 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y
CONFIG_HVC_CONSOLE=y
CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
CONFIG_FRAME_WARN=1280
CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
new file mode 100644
index 000000000000..0bd0e72d95d4
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
new file mode 100644
index 000000000000..dc266f3b1915
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
new file mode 100644
index 000000000000..a7b44dca0b0a
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
@@ -0,0 +1,6 @@
+CONFIG_SCLP_VT220_TTY=y
+CONFIG_SCLP_VT220_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_S390_GUEST=y
+CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1"
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
index 00a1ef4869d5..efa00693e08b 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -1,5 +1,6 @@
+CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 0b45055d9de0..2a0f48fac925 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -110,12 +110,6 @@ static void enable_logging(void)
panic("write(exception-trace)");
close(fd);
}
- fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
- if (fd >= 0) {
- if (write(fd, "1\n", 2) != 2)
- panic("write(panic_on_warn)");
- close(fd);
- }
}
static void kmod_selftests(void)
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 222ecc81d7df..f4c2a6eb1666 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM dirty ring implementation
*
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 59b1dd4a549e..2a3ed401ce46 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -77,7 +77,8 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
idx = srcu_read_lock(&kvm->irq_srcu);
- list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
+ list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
+ srcu_read_lock_held(&kvm->irq_srcu))
eventfd_signal(irqfd->resamplefd, 1);
srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dfb7dabdbc63..5ab12214e18d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -164,6 +164,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}
+__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+}
+
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
{
/*
@@ -357,6 +361,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
#endif
+static void kvm_flush_shadow_all(struct kvm *kvm)
+{
+ kvm_arch_flush_shadow_all(kvm);
+ kvm_arch_guest_memory_reclaimed(kvm);
+}
+
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
gfp_t gfp_flags)
@@ -485,12 +495,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
unsigned long end);
+typedef void (*on_unlock_fn_t)(struct kvm *kvm);
+
struct kvm_hva_range {
unsigned long start;
unsigned long end;
pte_t pte;
hva_handler_t handler;
on_lock_fn_t on_lock;
+ on_unlock_fn_t on_unlock;
bool flush_on_ret;
bool may_block;
};
@@ -578,8 +591,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
if (range->flush_on_ret && ret)
kvm_flush_remote_tlbs(kvm);
- if (locked)
+ if (locked) {
KVM_MMU_UNLOCK(kvm);
+ if (!IS_KVM_NULL_FN(range->on_unlock))
+ range->on_unlock(kvm);
+ }
srcu_read_unlock(&kvm->srcu, idx);
@@ -600,6 +616,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
.pte = pte,
.handler = handler,
.on_lock = (void *)kvm_null_fn,
+ .on_unlock = (void *)kvm_null_fn,
.flush_on_ret = true,
.may_block = false,
};
@@ -619,6 +636,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
.pte = __pte(0),
.handler = handler,
.on_lock = (void *)kvm_null_fn,
+ .on_unlock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = false,
};
@@ -662,7 +680,7 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
kvm->mmu_notifier_range_end = end;
} else {
/*
- * Fully tracking multiple concurrent ranges has dimishing
+ * Fully tracking multiple concurrent ranges has diminishing
* returns. Keep things simple and just find the minimal range
* which includes the current and new ranges. As there won't be
* enough information to subtract a range after its invalidate
@@ -687,6 +705,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
.pte = __pte(0),
.handler = kvm_unmap_gfn_range,
.on_lock = kvm_inc_notifier_count,
+ .on_unlock = kvm_arch_guest_memory_reclaimed,
.flush_on_ret = true,
.may_block = mmu_notifier_range_blockable(range),
};
@@ -741,6 +760,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
.pte = __pte(0),
.handler = (void *)kvm_null_fn,
.on_lock = kvm_dec_notifier_count,
+ .on_unlock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = mmu_notifier_range_blockable(range),
};
@@ -813,7 +833,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
int idx;
idx = srcu_read_lock(&kvm->srcu);
- kvm_arch_flush_shadow_all(kvm);
+ kvm_flush_shadow_all(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -955,12 +975,6 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
kvm_vcpu_stats_header.num_desc;
- /*
- * Force subsequent debugfs file creations to fail if the VM directory
- * is not created.
- */
- kvm->debugfs_dentry = ERR_PTR(-ENOENT);
-
if (!debugfs_initialized())
return 0;
@@ -1081,6 +1095,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
+ /*
+ * Force subsequent debugfs file creations to fail if the VM directory
+ * is not created (by kvm_create_vm_debugfs()).
+ */
+ kvm->debugfs_dentry = ERR_PTR(-ENOENT);
+
if (init_srcu_struct(&kvm->srcu))
goto out_err_no_srcu;
if (init_srcu_struct(&kvm->irq_srcu))
@@ -1225,7 +1245,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
kvm->mn_active_invalidate_count = 0;
#else
- kvm_arch_flush_shadow_all(kvm);
+ kvm_flush_shadow_all(kvm);
#endif
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
@@ -1540,7 +1560,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
r = kvm_arch_prepare_memory_region(kvm, old, new, change);
/* Free the bitmap on failure if it was allocated above. */
- if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap)
+ if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
kvm_destroy_dirty_bitmap(new);
return r;
@@ -1652,6 +1672,7 @@ static void kvm_invalidate_memslot(struct kvm *kvm,
* - kvm_is_visible_gfn (mmu_check_root)
*/
kvm_arch_flush_shadow_memslot(kvm, old);
+ kvm_arch_guest_memory_reclaimed(kvm);
/* Was released by kvm_swap_active_memslots, reacquire. */
mutex_lock(&kvm->slots_arch_lock);
@@ -1799,7 +1820,7 @@ static int kvm_set_memslot(struct kvm *kvm,
/*
* No need to refresh new->arch, changes after dropping slots_arch_lock
- * will directly hit the final, active memsot. Architectures are
+ * will directly hit the final, active memslot. Architectures are
* responsible for knowing that new->arch may be stale.
*/
kvm_commit_memory_region(kvm, old, new, change);
@@ -4333,6 +4354,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
return 0;
#endif
case KVM_CAP_BINARY_STATS_FD:
+ case KVM_CAP_SYSTEM_EVENT_DATA:
return 1;
default:
break;
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 34ca40823260..41da467d99c9 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __KVM_MM_H__
#define __KVM_MM_H__ 1