summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS6
-rw-r--r--Documentation/filesystems/bcachefs/SubmittingPatches.rst98
-rw-r--r--Documentation/filesystems/bcachefs/index.rst1
-rw-r--r--Documentation/virt/kvm/api.rst2
-rw-r--r--MAINTAINERS14
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/include/asm/elf.h6
-rw-r--r--arch/alpha/include/asm/pgtable.h2
-rw-r--r--arch/alpha/include/asm/processor.h8
-rw-r--r--arch/alpha/kernel/osf_sys.c11
-rw-r--r--arch/arm64/kvm/arch_timer.c49
-rw-r--r--arch/arm64/kvm/arm.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c24
-rw-r--r--arch/arm64/kvm/nested.c9
-rw-r--r--arch/arm64/kvm/sys_regs.c16
-rw-r--r--arch/loongarch/include/asm/cpu-info.h21
-rw-r--r--arch/loongarch/include/asm/smp.h2
-rw-r--r--arch/loongarch/kernel/genex.S28
-rw-r--r--arch/loongarch/kernel/idle.c3
-rw-r--r--arch/loongarch/kernel/proc.c29
-rw-r--r--arch/loongarch/kernel/reset.c6
-rw-r--r--arch/loongarch/kvm/main.c4
-rw-r--r--arch/loongarch/kvm/switch.S2
-rw-r--r--arch/loongarch/kvm/vcpu.c3
-rw-r--r--arch/loongarch/lib/csum.c2
-rw-r--r--arch/loongarch/mm/pageattr.c3
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c2
-rw-r--r--arch/s390/include/asm/gmap.h20
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/include/asm/pgtable.h21
-rw-r--r--arch/s390/include/asm/uv.h6
-rw-r--r--arch/s390/kernel/uv.c292
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/gaccess.c44
-rw-r--r--arch/s390/kvm/gmap-vsie.c142
-rw-r--r--arch/s390/kvm/gmap.c212
-rw-r--r--arch/s390/kvm/gmap.h39
-rw-r--r--arch/s390/kvm/intercept.c7
-rw-r--r--arch/s390/kvm/interrupt.c19
-rw-r--r--arch/s390/kvm/kvm-s390.c237
-rw-r--r--arch/s390/kvm/kvm-s390.h19
-rw-r--r--arch/s390/kvm/pv.c21
-rw-r--r--arch/s390/kvm/vsie.c106
-rw-r--r--arch/s390/mm/gmap.c681
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c33
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/xen/xen-head.S11
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c5
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c8
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c84
-rw-r--r--drivers/acpi/prmt.c4
-rw-r--r--drivers/acpi/property.c10
-rw-r--r--drivers/acpi/resource.c6
-rw-r--r--drivers/base/power/main.c21
-rw-r--r--drivers/block/sunvdc.c4
-rw-r--r--drivers/bus/moxtet.c2
-rw-r--r--drivers/cpufreq/Kconfig.arm3
-rw-r--r--drivers/cpufreq/amd-pstate.c20
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/firmware/Kconfig2
-rw-r--r--drivers/firmware/iscsi_ibft.c5
-rw-r--r--drivers/gpio/Kconfig1
-rw-r--r--drivers/gpio/gpio-pca953x.c19
-rw-r--r--drivers/gpio/gpio-sim.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/Makefile22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c3
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c4
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c2
-rw-r--r--drivers/gpu/drm/display/drm_dp_cec.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_backlight.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.c15
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c36
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h6
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c40
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c3
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c4
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c21
-rw-r--r--drivers/hid/Kconfig15
-rw-r--r--drivers/hid/amd-sfh-hid/Kconfig1
-rw-r--r--drivers/hid/hid-apple.c8
-rw-r--r--drivers/hid/hid-corsair-void.c3
-rw-r--r--drivers/hid/hid-ids.h3
-rw-r--r--drivers/hid/hid-lenovo.c7
-rw-r--r--drivers/hid/hid-multitouch.c5
-rw-r--r--drivers/hid/hid-quirks.c1
-rw-r--r--drivers/hid/hid-steam.c46
-rw-r--r--drivers/hid/hid-thrustmaster.c2
-rw-r--r--drivers/hid/hid-topre.c7
-rw-r--r--drivers/hid/hid-winwing.c2
-rw-r--r--drivers/hid/i2c-hid/Kconfig2
-rw-r--r--drivers/hid/intel-ish-hid/Kconfig1
-rw-r--r--drivers/hid/intel-ish-hid/ipc/hw-ish.h2
-rw-r--r--drivers/hid/intel-ish-hid/ipc/ipc.c15
-rw-r--r--drivers/hid/intel-ish-hid/ipc/pci-ish.c7
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h2
-rw-r--r--drivers/hid/intel-thc-hid/Kconfig1
-rw-r--r--drivers/hid/surface-hid/Kconfig2
-rw-r--r--drivers/hid/usbhid/Kconfig3
-rw-r--r--drivers/i2c/i2c-core-base.c113
-rw-r--r--drivers/irqchip/irq-partition-percpu.c2
-rw-r--r--drivers/md/md-linear.c4
-rw-r--r--drivers/mfd/syscon.c29
-rw-r--r--drivers/nvme/host/core.c8
-rw-r--r--drivers/nvme/host/fc.c35
-rw-r--r--drivers/nvme/host/pci.c12
-rw-r--r--drivers/nvme/host/sysfs.c2
-rw-r--r--drivers/nvme/target/admin-cmd.c1
-rw-r--r--drivers/nvme/target/fabrics-cmd.c2
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c2
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--drivers/pci/pcie/aspm.c3
-rw-r--r--drivers/pci/tph.c2
-rw-r--r--drivers/pinctrl/pinconf-generic.c8
-rw-r--r--drivers/pinctrl/pinctrl-cy8c95x0.c42
-rw-r--r--drivers/platform/x86/intel/int3472/discrete.c85
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c61
-rw-r--r--drivers/powercap/powercap_sys.c3
-rw-r--r--drivers/scsi/qla1280.c2
-rw-r--r--drivers/scsi/scsi_lib.c9
-rw-r--r--drivers/scsi/scsi_lib_test.c7
-rw-r--r--drivers/scsi/scsi_scan.c2
-rw-r--r--drivers/scsi/storvsc_drv.c1
-rw-r--r--drivers/soc/qcom/smp2p.c2
-rw-r--r--drivers/target/target_core_stat.c4
-rw-r--r--drivers/tty/pty.c2
-rw-r--r--drivers/ufs/core/ufshcd.c68
-rw-r--r--drivers/ufs/host/ufshcd-pci.c2
-rw-r--r--drivers/ufs/host/ufshcd-pltfrm.c28
-rw-r--r--fs/bcachefs/Kconfig7
-rw-r--r--fs/bcachefs/alloc_background.c47
-rw-r--r--fs/bcachefs/alloc_foreground.c10
-rw-r--r--fs/bcachefs/alloc_types.h1
-rw-r--r--fs/bcachefs/btree_iter.c33
-rw-r--r--fs/bcachefs/btree_iter.h14
-rw-r--r--fs/bcachefs/btree_key_cache.c1
-rw-r--r--fs/bcachefs/btree_trans_commit.c4
-rw-r--r--fs/bcachefs/btree_types.h3
-rw-r--r--fs/bcachefs/btree_update_interior.h4
-rw-r--r--fs/bcachefs/buckets_waiting_for_journal.c12
-rw-r--r--fs/bcachefs/buckets_waiting_for_journal.h4
-rw-r--r--fs/bcachefs/disk_accounting.h2
-rw-r--r--fs/bcachefs/inode.h4
-rw-r--r--fs/bcachefs/io_write.c12
-rw-r--r--fs/bcachefs/journal.c18
-rw-r--r--fs/bcachefs/journal.h1
-rw-r--r--fs/bcachefs/journal_reclaim.c37
-rw-r--r--fs/bcachefs/journal_types.h6
-rw-r--r--fs/bcachefs/opts.h14
-rw-r--r--fs/bcachefs/rebalance.c8
-rw-r--r--fs/bcachefs/rebalance.h20
-rw-r--r--fs/bcachefs/reflink.c2
-rw-r--r--fs/bcachefs/sb-errors_format.h4
-rw-r--r--fs/bcachefs/subvolume.c7
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/bcachefs/super.h1
-rw-r--r--fs/bcachefs/trace.h14
-rw-r--r--fs/btrfs/extent_io.c29
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/erofs/zdata.c2
-rw-r--r--fs/file_table.c16
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/namespace.c54
-rw-r--r--fs/nfsd/filecache.c11
-rw-r--r--fs/nfsd/nfs2acl.c2
-rw-r--r--fs/nfsd/nfs3acl.c2
-rw-r--r--fs/nfsd/nfs4callback.c9
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfsfh.c5
-rw-r--r--fs/notify/fsnotify.c18
-rw-r--r--fs/open.c11
-rw-r--r--fs/pidfs.c12
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/smb/client/cifsglob.h14
-rw-r--r--fs/smb/client/dfs.c30
-rw-r--r--fs/smb/client/dfs.h7
-rw-r--r--fs/smb/client/dfs_cache.c27
-rw-r--r--fs/smb/client/smb1ops.c2
-rw-r--r--fs/smb/client/smb2ops.c18
-rw-r--r--fs/smb/client/smb2pdu.c4
-rw-r--r--fs/smb/client/smb2proto.h2
-rw-r--r--fs/stat.c4
-rw-r--r--fs/vboxsf/super.c3
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/drm/drm_print.h1
-rw-r--r--include/linux/compiler.h32
-rw-r--r--include/linux/fs.h20
-rw-r--r--include/linux/fsnotify.h4
-rw-r--r--include/linux/i2c.h10
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/lockref.h7
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/string.h12
-rw-r--r--include/uapi/drm/amdgpu_drm.h9
-rw-r--r--include/ufs/ufs.h4
-rw-r--r--include/ufs/ufshcd.h1
-rw-r--r--io_uring/futex.c2
-rw-r--r--kernel/futex/core.c5
-rw-r--r--kernel/futex/futex.h11
-rw-r--r--kernel/futex/pi.c2
-rw-r--r--kernel/futex/waitwake.c4
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/fair.c19
-rw-r--r--kernel/seccomp.c12
-rw-r--r--kernel/time/clocksource.c6
-rw-r--r--kernel/time/timer_migration.c10
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--lib/stackinit_kunit.c6
-rw-r--r--net/bluetooth/hidp/Kconfig3
-rw-r--r--net/socket.c5
-rw-r--r--rust/Makefile5
-rw-r--r--rust/kernel/init.rs2
-rw-r--r--samples/hid/Makefile13
-rw-r--r--scripts/Makefile.extrawarn15
-rw-r--r--scripts/Makefile.lib2
-rw-r--r--scripts/generate_rust_target.rs18
-rw-r--r--scripts/mod/modpost.c35
-rw-r--r--scripts/mod/modpost.h6
-rw-r--r--scripts/module.lds.S1
-rwxr-xr-xscripts/package/install-extmod-build2
-rw-r--r--security/tomoyo/common.c145
-rw-r--r--security/tomoyo/domain.c2
-rw-r--r--security/tomoyo/securityfs_if.c6
-rw-r--r--security/tomoyo/tomoyo.c5
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c22
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c4
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c32
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c199
-rw-r--r--virt/kvm/kvm_main.c25
259 files changed, 2963 insertions, 1882 deletions
diff --git a/CREDITS b/CREDITS
index 1f9f0f078b4a..53d11a46fd69 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2515,11 +2515,9 @@ D: SLS distribution
D: Initial implementation of VC's, pty's and select()
N: Pavel Machek
-E: pavel@ucw.cz
+E: pavel@kernel.org
P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96
-D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd,
-D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB,
-D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
+D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk,
D: Altera SoCFPGA and Nokia N900 support.
S: Czech Republic
diff --git a/Documentation/filesystems/bcachefs/SubmittingPatches.rst b/Documentation/filesystems/bcachefs/SubmittingPatches.rst
new file mode 100644
index 000000000000..026b12ae0d6a
--- /dev/null
+++ b/Documentation/filesystems/bcachefs/SubmittingPatches.rst
@@ -0,0 +1,98 @@
+Submitting patches to bcachefs:
+===============================
+
+Patches must be tested before being submitted, either with the xfstests suite
+[0], or the full bcachefs test suite in ktest [1], depending on what's being
+touched. Note that ktest wraps xfstests and will be an easier method to running
+it for most users; it includes single-command wrappers for all the mainstream
+in-kernel local filesystems.
+
+Patches will undergo more testing after being merged (including
+lockdep/kasan/preempt/etc. variants), these are not generally required to be
+run by the submitter - but do put some thought into what you're changing and
+which tests might be relevant, e.g. are you dealing with tricky memory layout
+work? kasan, are you doing locking work? then lockdep; and ktest includes
+single-command variants for the debug build types you'll most likely need.
+
+The exception to this rule is incomplete WIP/RFC patches: if you're working on
+something nontrivial, it's encouraged to send out a WIP patch to let people
+know what you're doing and make sure you're on the right track. Just make sure
+it includes a brief note as to what's done and what's incomplete, to avoid
+confusion.
+
+Rigorous checkpatch.pl adherence is not required (many of its warnings are
+considered out of date), but try not to deviate too much without reason.
+
+Focus on writing code that reads well and is organized well; code should be
+aesthetically pleasing.
+
+CI:
+===
+
+Instead of running your tests locally, when running the full test suite it's
+prefereable to let a server farm do it in parallel, and then have the results
+in a nice test dashboard (which can tell you which failures are new, and
+presents results in a git log view, avoiding the need for most bisecting).
+
+That exists [2], and community members may request an account. If you work for
+a big tech company, you'll need to help out with server costs to get access -
+but the CI is not restricted to running bcachefs tests: it runs any ktest test
+(which generally makes it easy to wrap other tests that can run in qemu).
+
+Other things to think about:
+============================
+
+- How will we debug this code? Is there sufficient introspection to diagnose
+ when something starts acting wonky on a user machine?
+
+ We don't necessarily need every single field of every data structure visible
+ with introspection, but having the important fields of all the core data
+ types wired up makes debugging drastically easier - a bit of thoughtful
+ foresight greatly reduces the need to have people build custom kernels with
+ debug patches.
+
+ More broadly, think about all the debug tooling that might be needed.
+
+- Does it make the codebase more or less of a mess? Can we also try to do some
+ organizing, too?
+
+- Do new tests need to be written? New assertions? How do we know and verify
+ that the code is correct, and what happens if something goes wrong?
+
+ We don't yet have automated code coverage analysis or easy fault injection -
+ but for now, pretend we did and ask what they might tell us.
+
+ Assertions are hugely important, given that we don't yet have a systems
+ language that can do ergonomic embedded correctness proofs. Hitting an assert
+ in testing is much better than wandering off into undefined behaviour la-la
+ land - use them. Use them judiciously, and not as a replacement for proper
+ error handling, but use them.
+
+- Does it need to be performance tested? Should we add new peformance counters?
+
+ bcachefs has a set of persistent runtime counters which can be viewed with
+ the 'bcachefs fs top' command; this should give users a basic idea of what
+ their filesystem is currently doing. If you're doing a new feature or looking
+ at old code, think if anything should be added.
+
+- If it's a new on disk format feature - have upgrades and downgrades been
+ tested? (Automated tests exists but aren't in the CI, due to the hassle of
+ disk image management; coordinate to have them run.)
+
+Mailing list, IRC:
+==================
+
+Patches should hit the list [3], but much discussion and code review happens on
+IRC as well [4]; many people appreciate the more conversational approach and
+quicker feedback.
+
+Additionally, we have a lively user community doing excellent QA work, which
+exists primarily on IRC. Please make use of that resource; user feedback is
+important for any nontrivial feature, and documenting it in commit messages
+would be a good idea.
+
+[0]: git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git
+[1]: https://evilpiepirate.org/git/ktest.git/
+[2]: https://evilpiepirate.org/~testdashboard/ci/
+[3]: linux-bcachefs@vger.kernel.org
+[4]: irc.oftc.net#bcache, #bcachefs-dev
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
index 95fc4b90739e..7db4d7ceab58 100644
--- a/Documentation/filesystems/bcachefs/index.rst
+++ b/Documentation/filesystems/bcachefs/index.rst
@@ -9,4 +9,5 @@ bcachefs Documentation
:numbered:
CodingStyle
+ SubmittingPatches
errorcodes
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 0d1c3a820ce6..2b52eb77e29c 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1419,7 +1419,7 @@ fetch) is injected in the guest.
S390:
^^^^^
-Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set.
+Returns -EINVAL or -EEXIST if the VM has the KVM_VM_S390_UCONTROL flag set.
Returns -EINVAL if called on a protected VM.
4.36 KVM_SET_TSS_ADDR
diff --git a/MAINTAINERS b/MAINTAINERS
index b182021f4906..10893c91b1c1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2209,7 +2209,6 @@ F: sound/soc/codecs/cs42l84.*
F: sound/soc/codecs/ssm3515.c
ARM/APPLE MACHINE SUPPORT
-M: Hector Martin <marcan@marcan.st>
M: Sven Peter <sven@svenpeter.dev>
R: Alyssa Rosenzweig <alyssa@rosenzweig.io>
L: asahi@lists.linux.dev
@@ -3954,6 +3953,7 @@ M: Kent Overstreet <kent.overstreet@linux.dev>
L: linux-bcachefs@vger.kernel.org
S: Supported
C: irc://irc.oftc.net/bcache
+P: Documentation/filesystems/bcachefs/SubmittingPatches.rst
T: git https://evilpiepirate.org/git/bcachefs.git
F: fs/bcachefs/
F: Documentation/filesystems/bcachefs/
@@ -9417,7 +9417,7 @@ F: fs/freevxfs/
FREEZER
M: "Rafael J. Wysocki" <rafael@kernel.org>
-M: Pavel Machek <pavel@ucw.cz>
+M: Pavel Machek <pavel@kernel.org>
L: linux-pm@vger.kernel.org
S: Supported
F: Documentation/power/freezing-of-tasks.rst
@@ -9877,7 +9877,7 @@ S: Maintained
F: drivers/staging/gpib/
GPIO ACPI SUPPORT
-M: Mika Westerberg <mika.westerberg@linux.intel.com>
+M: Mika Westerberg <westeri@kernel.org>
M: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
L: linux-gpio@vger.kernel.org
L: linux-acpi@vger.kernel.org
@@ -10252,7 +10252,7 @@ F: drivers/video/fbdev/hgafb.c
HIBERNATION (aka Software Suspend, aka swsusp)
M: "Rafael J. Wysocki" <rafael@kernel.org>
-M: Pavel Machek <pavel@ucw.cz>
+M: Pavel Machek <pavel@kernel.org>
L: linux-pm@vger.kernel.org
S: Supported
B: https://bugzilla.kernel.org
@@ -13123,8 +13123,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har
F: scripts/leaking_addresses.pl
LED SUBSYSTEM
-M: Pavel Machek <pavel@ucw.cz>
M: Lee Jones <lee@kernel.org>
+M: Pavel Machek <pavel@kernel.org>
L: linux-leds@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git
@@ -16824,7 +16824,7 @@ F: include/linux/tick.h
F: kernel/time/tick*.*
NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS)
-M: Pavel Machek <pavel@ucw.cz>
+M: Pavel Machek <pavel@kernel.org>
M: Sakari Ailus <sakari.ailus@iki.fi>
L: linux-media@vger.kernel.org
S: Maintained
@@ -22847,7 +22847,7 @@ F: drivers/sh/
SUSPEND TO RAM
M: "Rafael J. Wysocki" <rafael@kernel.org>
M: Len Brown <len.brown@intel.com>
-M: Pavel Machek <pavel@ucw.cz>
+M: Pavel Machek <pavel@kernel.org>
L: linux-pm@vger.kernel.org
S: Supported
B: https://bugzilla.kernel.org
diff --git a/Makefile b/Makefile
index 9e0d63d9d94b..89628e354ca7 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 14
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h
index 4d7c46f50382..50c82187e60e 100644
--- a/arch/alpha/include/asm/elf.h
+++ b/arch/alpha/include/asm/elf.h
@@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
-#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
+#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT))
/*
* These are used to set parameters in the core dumps.
@@ -137,10 +137,6 @@ extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task);
: amask (AMASK_CIX) ? "ev6" : "ev67"); \
})
-#define SET_PERSONALITY(EX) \
- set_personality(((EX).e_flags & EF_ALPHA_32BIT) \
- ? PER_LINUX_32BIT : PER_LINUX)
-
extern int alpha_l1i_cacheshape;
extern int alpha_l1d_cacheshape;
extern int alpha_l2_cacheshape;
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 635f0a5f5bbd..02e8817a8921 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -360,7 +360,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
extern void paging_init(void);
-/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */
+/* We have our own get_unmapped_area */
#define HAVE_ARCH_UNMAPPED_AREA
#endif /* _ALPHA_PGTABLE_H */
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 55bb1c09fd39..5dce5518a211 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -8,23 +8,19 @@
#ifndef __ASM_ALPHA_PROCESSOR_H
#define __ASM_ALPHA_PROCESSOR_H
-#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */
-
/*
* We have a 42-bit user address space: 4TB user VM...
*/
#define TASK_SIZE (0x40000000000UL)
-#define STACK_TOP \
- (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL)
+#define STACK_TOP (0x00120000000UL)
#define STACK_TOP_MAX 0x00120000000UL
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE \
- ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2)
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 2)
/* This is dead. Everything has been moved to thread_info. */
struct thread_struct { };
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 86185021f75a..a08e8edef1a4 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1210,8 +1210,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
return ret;
}
-/* Get an address range which is currently unmapped. Similar to the
- generic version except that we know how to honor ADDR_LIMIT_32BIT. */
+/* Get an address range which is currently unmapped. */
static unsigned long
arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
@@ -1230,13 +1229,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags, vm_flags_t vm_flags)
{
- unsigned long limit;
-
- /* "32 bit" actually means 31 bit, since pointers sign extend. */
- if (current->personality & ADDR_LIMIT_32BIT)
- limit = 0x80000000;
- else
- limit = TASK_SIZE;
+ unsigned long limit = TASK_SIZE;
if (len > limit)
return -ENOMEM;
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index d3d243366536..231c0cd9c7b4 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -471,10 +471,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
trace_kvm_timer_emulate(ctx, should_fire);
- if (should_fire != ctx->irq.level) {
+ if (should_fire != ctx->irq.level)
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
- return;
- }
kvm_timer_update_status(ctx, should_fire);
@@ -761,21 +759,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
timer_irq(map->direct_ptimer),
&arch_timer_irq_ops);
WARN_ON_ONCE(ret);
-
- /*
- * The virtual offset behaviour is "interesting", as it
- * always applies when HCR_EL2.E2H==0, but only when
- * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
- * track E2H when putting the HV timer in "direct" mode.
- */
- if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
- struct arch_timer_offset *offs = &map->direct_vtimer->offset;
-
- if (vcpu_el2_e2h_is_set(vcpu))
- offs->vcpu_offset = NULL;
- else
- offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
- }
}
}
@@ -976,31 +959,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
* which allows trapping of the timer registers even with NV2.
* Still, this is still worse than FEAT_NV on its own. Meh.
*/
- if (!vcpu_el2_e2h_is_set(vcpu)) {
- if (cpus_have_final_cap(ARM64_HAS_ECV))
- return;
-
- /*
- * A non-VHE guest hypervisor doesn't have any direct access
- * to its timers: the EL2 registers trap (and the HW is
- * fully emulated), while the EL0 registers access memory
- * despite the access being notionally direct. Boo.
- *
- * We update the hardware timer registers with the
- * latest value written by the guest to the VNCR page
- * and let the hardware take care of the rest.
- */
- write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
- write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
- write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL);
- write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
- } else {
+ if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
/*
* For a VHE guest hypervisor, the EL2 state is directly
- * stored in the host EL1 timers, while the emulated EL0
+ * stored in the host EL1 timers, while the emulated EL1
* state is stored in the VNCR page. The latter could have
* been updated behind our back, and we must reset the
* emulation of the timers.
+ *
+ * A non-VHE guest hypervisor doesn't have any direct access
+ * to its timers: the EL2 registers trap despite being
+ * notionally direct (we use the EL1 HW, as for VHE), while
+ * the EL1 registers access memory.
+ *
+ * In both cases, process the emulated timers on each guest
+ * exit. Boo.
*/
struct timer_map map;
get_timer_map(vcpu, &map);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 646e806c6ca6..071a7d75be68 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2290,6 +2290,19 @@ static int __init init_subsystems(void)
break;
case -ENODEV:
case -ENXIO:
+ /*
+ * No VGIC? No pKVM for you.
+ *
+ * Protected mode assumes that VGICv3 is present, so no point
+ * in trying to hobble along if vgic initialization fails.
+ */
+ if (is_protected_kvm_enabled())
+ goto out;
+
+ /*
+ * Otherwise, userspace could choose to implement a GIC for its
+ * guest on non-cooperative hardware.
+ */
vgic_present = false;
err = 0;
break;
@@ -2400,6 +2413,13 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
+
+ /*
+ * Flush entire BSS since part of its data containing init symbols is read
+ * while the MMU is off.
+ */
+ kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start),
+ kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start));
}
static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 5c134520e180..6e12c070832f 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -91,11 +91,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
+static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;
+
+ if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
+ hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
+ else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
+ hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
+}
+
+static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
+ host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
+ else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
+ host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
fpsimd_sve_flush();
+ flush_debug_state(hyp_vcpu);
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
@@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
unsigned int i;
fpsimd_sve_sync(&hyp_vcpu->vcpu);
+ sync_debug_state(hyp_vcpu);
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 33d2ace68665..0c9387d2f507 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
if (!tmp)
return -ENOMEM;
+ swap(kvm->arch.nested_mmus, tmp);
+
/*
* If we went through a realocation, adjust the MMU back-pointers in
* the previously initialised kvm_pgtable structures.
*/
if (kvm->arch.nested_mmus != tmp)
for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
- tmp[i].pgt->mmu = &tmp[i];
+ kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];
for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
- ret = init_nested_s2_mmu(kvm, &tmp[i]);
+ ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);
if (ret) {
for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
- kvm_free_stage2_pgd(&tmp[i]);
+ kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]);
return ret;
}
kvm->arch.nested_mmus_size = num_mmus;
- kvm->arch.nested_mmus = tmp;
return 0;
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f6cd1ea7fb55..82430c1e1dd0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_hv_timer(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ return undef_access(vcpu, p, r);
+
+ return access_arch_timer(vcpu, p, r);
+}
+
static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
s64 new, s64 cur)
{
@@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),
- { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
- EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
- EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
+ { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer },
+ EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0),
+ EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0),
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h
index 35e0a230a484..7f5bc0ad9d50 100644
--- a/arch/loongarch/include/asm/cpu-info.h
+++ b/arch/loongarch/include/asm/cpu-info.h
@@ -76,27 +76,6 @@ extern const char *__cpu_full_name[];
#define cpu_family_string() __cpu_family[raw_smp_processor_id()]
#define cpu_full_name_string() __cpu_full_name[raw_smp_processor_id()]
-struct seq_file;
-struct notifier_block;
-
-extern int register_proc_cpuinfo_notifier(struct notifier_block *nb);
-extern int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v);
-
-#define proc_cpuinfo_notifier(fn, pri) \
-({ \
- static struct notifier_block fn##_nb = { \
- .notifier_call = fn, \
- .priority = pri \
- }; \
- \
- register_proc_cpuinfo_notifier(&fn##_nb); \
-})
-
-struct proc_cpuinfo_notifier_args {
- struct seq_file *m;
- unsigned long n;
-};
-
static inline bool cpus_are_siblings(int cpua, int cpub)
{
struct cpuinfo_loongarch *infoa = &cpu_data[cpua];
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 3383c9d24e94..b87d1d5e5890 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -77,6 +77,8 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_IRQ_WORK BIT(ACTION_IRQ_WORK)
#define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR)
+struct seq_file;
+
struct secondary_data {
unsigned long stack;
unsigned long thread_info;
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 86d5d90ebefe..4f0912141781 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -18,16 +18,19 @@
.align 5
SYM_FUNC_START(__arch_cpu_idle)
- /* start of rollback region */
- LONG_L t0, tp, TI_FLAGS
- nop
- andi t0, t0, _TIF_NEED_RESCHED
- bnez t0, 1f
- nop
- nop
- nop
+ /* start of idle interrupt region */
+ ori t0, zero, CSR_CRMD_IE
+ /* idle instruction needs irq enabled */
+ csrxchg t0, t0, LOONGARCH_CSR_CRMD
+ /*
+ * If an interrupt lands here; between enabling interrupts above and
+ * going idle on the next instruction, we must *NOT* go idle since the
+ * interrupt could have set TIF_NEED_RESCHED or caused an timer to need
+ * reprogramming. Fall through -- see handle_vint() below -- and have
+ * the idle loop take care of things.
+ */
idle 0
- /* end of rollback region */
+ /* end of idle interrupt region */
1: jr ra
SYM_FUNC_END(__arch_cpu_idle)
@@ -35,11 +38,10 @@ SYM_CODE_START(handle_vint)
UNWIND_HINT_UNDEFINED
BACKUP_T0T1
SAVE_ALL
- la_abs t1, __arch_cpu_idle
+ la_abs t1, 1b
LONG_L t0, sp, PT_ERA
- /* 32 byte rollback region */
- ori t0, t0, 0x1f
- xori t0, t0, 0x1f
+ /* 3 instructions idle interrupt region */
+ ori t0, t0, 0b1100
bne t0, t1, 1f
LONG_S t0, sp, PT_ERA
1: move a0, sp
diff --git a/arch/loongarch/kernel/idle.c b/arch/loongarch/kernel/idle.c
index 0b5dd2faeb90..54b247d8cdb6 100644
--- a/arch/loongarch/kernel/idle.c
+++ b/arch/loongarch/kernel/idle.c
@@ -11,7 +11,6 @@
void __cpuidle arch_cpu_idle(void)
{
- raw_local_irq_enable();
- __arch_cpu_idle(); /* idle instruction needs irq enabled */
+ __arch_cpu_idle();
raw_local_irq_disable();
}
diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c
index 6ce46d92f1f1..cea30768ae92 100644
--- a/arch/loongarch/kernel/proc.c
+++ b/arch/loongarch/kernel/proc.c
@@ -13,28 +13,12 @@
#include <asm/processor.h>
#include <asm/time.h>
-/*
- * No lock; only written during early bootup by CPU 0.
- */
-static RAW_NOTIFIER_HEAD(proc_cpuinfo_chain);
-
-int __ref register_proc_cpuinfo_notifier(struct notifier_block *nb)
-{
- return raw_notifier_chain_register(&proc_cpuinfo_chain, nb);
-}
-
-int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v)
-{
- return raw_notifier_call_chain(&proc_cpuinfo_chain, val, v);
-}
-
static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long n = (unsigned long) v - 1;
unsigned int isa = cpu_data[n].isa_level;
unsigned int version = cpu_data[n].processor_id & 0xff;
unsigned int fp_version = cpu_data[n].fpu_vers;
- struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args;
#ifdef CONFIG_SMP
if (!cpu_online(n))
@@ -91,20 +75,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips");
seq_printf(m, "\n");
- seq_printf(m, "Hardware Watchpoint\t: %s",
- cpu_has_watch ? "yes, " : "no\n");
+ seq_printf(m, "Hardware Watchpoint\t: %s", str_yes_no(cpu_has_watch));
if (cpu_has_watch) {
- seq_printf(m, "iwatch count: %d, dwatch count: %d\n",
+ seq_printf(m, ", iwatch count: %d, dwatch count: %d",
cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count);
}
- proc_cpuinfo_notifier_args.m = m;
- proc_cpuinfo_notifier_args.n = n;
-
- raw_notifier_call_chain(&proc_cpuinfo_chain, 0,
- &proc_cpuinfo_notifier_args);
-
- seq_printf(m, "\n");
+ seq_printf(m, "\n\n");
return 0;
}
diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c
index 1ef8c6383535..de8fa5a8a825 100644
--- a/arch/loongarch/kernel/reset.c
+++ b/arch/loongarch/kernel/reset.c
@@ -33,7 +33,7 @@ void machine_halt(void)
console_flush_on_panic(CONSOLE_FLUSH_PENDING);
while (true) {
- __arch_cpu_idle();
+ __asm__ __volatile__("idle 0" : : : "memory");
}
}
@@ -53,7 +53,7 @@ void machine_power_off(void)
#endif
while (true) {
- __arch_cpu_idle();
+ __asm__ __volatile__("idle 0" : : : "memory");
}
}
@@ -74,6 +74,6 @@ void machine_restart(char *command)
acpi_reboot();
while (true) {
- __arch_cpu_idle();
+ __asm__ __volatile__("idle 0" : : : "memory");
}
}
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
index bf9268bf26d5..f6d3242b9234 100644
--- a/arch/loongarch/kvm/main.c
+++ b/arch/loongarch/kvm/main.c
@@ -303,9 +303,9 @@ int kvm_arch_enable_virtualization_cpu(void)
* TOE=0: Trap on Exception.
* TIT=0: Trap on Timer.
*/
- if (env & CSR_GCFG_GCIP_ALL)
+ if (env & CSR_GCFG_GCIP_SECURE)
gcfg |= CSR_GCFG_GCI_SECURE;
- if (env & CSR_GCFG_MATC_ROOT)
+ if (env & CSR_GCFG_MATP_ROOT)
gcfg |= CSR_GCFG_MATC_ROOT;
write_csr_gcfg(gcfg);
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 0c292f818492..1be185e94807 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -85,7 +85,7 @@
* Guest CRMD comes from separate GCSR_CRMD register
*/
ori t0, zero, CSR_PRMD_PIE
- csrxchg t0, t0, LOONGARCH_CSR_PRMD
+ csrwr t0, LOONGARCH_CSR_PRMD
/* Set PVM bit to setup ertn to guest context */
ori t0, zero, CSR_GSTAT_PVM
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index fb72095c8077..20f941af3e9e 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -1548,9 +1548,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
/* Restore timer state regardless */
kvm_restore_timer(vcpu);
-
- /* Control guest page CCA attribute */
- change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
/* Restore hardware PMU CSRs */
diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c
index a5e84b403c3b..df309ae4045d 100644
--- a/arch/loongarch/lib/csum.c
+++ b/arch/loongarch/lib/csum.c
@@ -25,7 +25,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
const u64 *ptr;
u64 data, sum64 = 0;
- if (unlikely(len == 0))
+ if (unlikely(len <= 0))
return 0;
offset = (unsigned long)buff & 7;
diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c
index bf8678248444..99165903908a 100644
--- a/arch/loongarch/mm/pageattr.c
+++ b/arch/loongarch/mm/pageattr.c
@@ -3,6 +3,7 @@
* Copyright (C) 2024 Loongson Technology Corporation Limited
*/
+#include <linux/memblock.h>
#include <linux/pagewalk.h>
#include <linux/pgtable.h>
#include <asm/set_memory.h>
@@ -167,7 +168,7 @@ bool kernel_page_present(struct page *page)
unsigned long addr = (unsigned long)page_address(page);
if (addr < vm_map_base)
- return true;
+ return memblock_is_memory(__pa(addr));
pgd = pgd_offset_k(addr);
if (pgd_none(pgdp_get(pgd)))
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 1aa0cb097c9c..7b9a5ea9cad9 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -75,7 +75,7 @@ static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p)
srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK;
cascade_virq = msi_data->cascade_array[srs]->virq;
- seq_printf(p, " fsl-msi-%d", cascade_virq);
+ seq_printf(p, "fsl-msi-%d", cascade_virq);
}
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 13f51a6a5bb1..4e73ef46d4b2 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -23,7 +23,6 @@
/**
* struct gmap_struct - guest address space
* @list: list head for the mm->context gmap list
- * @crst_list: list of all crst tables used in the guest address space
* @mm: pointer to the parent mm_struct
* @guest_to_host: radix tree with guest to host address translation
* @host_to_guest: radix tree with pointer to segment table entries
@@ -35,7 +34,6 @@
* @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
- * @pt_list: list of all page tables used in the shadow guest address space
* @shadow_lock: spinlock to protect the shadow gmap list
* @parent: pointer to the parent gmap for shadow guest address spaces
* @orig_asce: ASCE for which the shadow page table has been created
@@ -45,7 +43,6 @@
*/
struct gmap {
struct list_head list;
- struct list_head crst_list;
struct mm_struct *mm;
struct radix_tree_root guest_to_host;
struct radix_tree_root host_to_guest;
@@ -61,7 +58,6 @@ struct gmap {
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
- struct list_head pt_list;
spinlock_t shadow_lock;
struct gmap *parent;
unsigned long orig_asce;
@@ -106,23 +102,21 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
void gmap_remove(struct gmap *gmap);
struct gmap *gmap_get(struct gmap *gmap);
void gmap_put(struct gmap *gmap);
+void gmap_free(struct gmap *gmap);
+struct gmap *gmap_alloc(unsigned long limit);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
void __gmap_zap(struct gmap *, unsigned long gaddr);
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level);
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+void gmap_unshadow(struct gmap *sg);
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake);
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
@@ -131,24 +125,22 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake);
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake);
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection, int *fake);
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
void gmap_register_pte_notifier(struct gmap_notifier *);
void gmap_unregister_pte_notifier(struct gmap_notifier *);
-int gmap_mprotect_notify(struct gmap *, unsigned long start,
- unsigned long len, int prot);
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
int s390_disable_cow_sharing(void);
-void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
unsigned long end, bool interruptible);
+int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split);
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
/**
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 97c7c8127543..9a367866cab0 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -30,6 +30,8 @@
#define KVM_S390_ESCA_CPU_SLOTS 248
#define KVM_MAX_VCPUS 255
+#define KVM_INTERNAL_MEM_SLOTS 1
+
/*
* These seem to be used for allocating ->chip in the routing table, which we
* don't use. 1 is as small as we can get to reduce the needed memory. If we
@@ -931,12 +933,14 @@ struct sie_page2 {
u8 reserved928[0x1000 - 0x928]; /* 0x0928 */
};
+struct vsie_page;
+
struct kvm_s390_vsie {
struct mutex mutex;
struct radix_tree_root addr_to_page;
int page_count;
int next;
- struct page *pages[KVM_MAX_VCPUS];
+ struct vsie_page *pages[KVM_MAX_VCPUS];
};
struct kvm_s390_gisa_iam {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index a3b51056a177..3ca5af4cfe43 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -420,9 +420,10 @@ void setup_protection_map(void);
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
-#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
-#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
-#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
+#define PGSTE_ST2_MASK 0x0000ffff00000000UL
+#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */
+#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */
/* Guest Page State used for virtualization */
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
@@ -2007,4 +2008,18 @@ extern void s390_reset_cmma(struct mm_struct *mm);
#define pmd_pgtable(pmd) \
((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
+static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
+{
+ unsigned long *pgstes, res;
+
+ pgstes = pgt + _PAGE_ENTRIES;
+
+ res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
+ res |= pgstes[1] & PGSTE_ST2_MASK;
+ res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
+ res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
+
+ return res;
+}
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index dc332609f2c3..b11f5b6d0bd1 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void)
}
int uv_pin_shared(unsigned long paddr);
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int uv_convert_from_secure_folio(struct folio *folio);
void setup_uv(void);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 6f9654a191ad..9f05df2da2f7 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -19,19 +19,6 @@
#include <asm/sections.h>
#include <asm/uv.h>
-#if !IS_ENABLED(CONFIG_KVM)
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- return 0;
-}
-
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
-{
- return 0;
-}
-#endif
-
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
@@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte)
*
* @paddr: Absolute host address of page to be exported
*/
-static int uv_convert_from_secure(unsigned long paddr)
+int uv_convert_from_secure(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
* The caller must already hold a reference to the folio.
*/
-static int uv_convert_from_secure_folio(struct folio *folio)
+int uv_convert_from_secure_folio(struct folio *folio)
{
int rc;
@@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio)
return res;
}
-static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+/**
+ * make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -E2BIG if the folio is large;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio())
+ */
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
+ if (folio_test_large(folio))
+ return -E2BIG;
if (folio_test_writeback(folio))
- return -EAGAIN;
- expected = expected_folio_refs(folio);
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
if (!folio_ref_freeze(folio, expected))
return -EBUSY;
set_bit(PG_arch_1, &folio->flags);
@@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return -EAGAIN;
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-
-/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
-{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
-
-/*
- * Drain LRU caches: the local one on first invocation and the ones of all
- * CPUs on successive invocations. Returns "true" on the first invocation.
- */
-static bool drain_lru(bool *drain_lru_called)
-{
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (*drain_lru_called) {
- lru_add_drain_all();
- /* We give up here, don't retry immediately. */
- return false;
- }
- /*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- *drain_lru_called = true;
- /* The caller should try again immediately */
- return true;
-}
-
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
- struct vm_area_struct *vma;
- bool drain_lru_called = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
- int rc;
-
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EAGAIN;
- if (folio_test_large(folio)) {
- rc = -E2BIG;
- } else if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
- folio_unlock(folio);
- }
-
- /*
- * Once we drop the PTL, the folio may get unmapped and
- * freed immediately. We need a temporary reference.
- */
- if (rc == -EAGAIN || rc == -E2BIG)
- folio_get(folio);
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- switch (rc) {
- case -E2BIG:
- folio_lock(folio);
- rc = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
-
- switch (rc) {
- case 0:
- /* Splitting succeeded, try again immediately. */
- goto again;
- case -EAGAIN:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -EBUSY:
- /* Unexpected race. */
- return -EAGAIN;
- }
- WARN_ON_ONCE(1);
- return -ENXIO;
- case -EAGAIN:
- /*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
- */
- folio_wait_writeback(folio);
- folio_put(folio);
- return -EAGAIN;
- case -EBUSY:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -ENXIO:
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
-
- return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
- struct vm_area_struct *vma;
- struct folio_walk fw;
- unsigned long uaddr;
- struct folio *folio;
- int rc;
-
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = 0;
- folio = folio_walk_start(&fw, vma, uaddr, 0);
- if (!folio)
- goto out;
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio) || !pte_write(fw.pte))
- goto out_walk_end;
- rc = uv_destroy_folio(folio);
- /*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
- */
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-out_walk_end:
- folio_walk_end(&fw, vma);
-out:
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(make_folio_secure);
/*
* To be called with the folio locked or with an extra reference! This will
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..f0ffe874adc2 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9816b0060fbe..f6fded15633a 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -16,6 +16,7 @@
#include <asm/gmap.h>
#include <asm/dat-bits.h>
#include "kvm-s390.h"
+#include "gmap.h"
#include "gaccess.h"
/*
@@ -1393,6 +1394,44 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
@@ -1415,6 +1454,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
@@ -1423,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
*/
ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
new file mode 100644
index 000000000000..a6d1dbb04c97
--- /dev/null
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390 nested VMs.
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+#include "gmap.h"
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ *
+ * Context: Called with parent->shadow_lock held
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg;
+
+ lockdep_assert_held(&parent->shadow_lock);
+ list_for_each_entry(sg, &parent->children, list) {
+ if (!gmap_shadow_valid(sg, asce, edat_level))
+ continue;
+ if (!sg->initialized)
+ return ERR_PTR(-EAGAIN);
+ refcount_inc(&sg->ref_count);
+ return sg;
+ }
+ return NULL;
+}
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg, *new;
+ unsigned long limit;
+ int rc;
+
+ if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
+ KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
+ return ERR_PTR(-EFAULT);
+ spin_lock(&parent->shadow_lock);
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ spin_unlock(&parent->shadow_lock);
+ if (sg)
+ return sg;
+ /* Create a new shadow gmap */
+ limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+ if (asce & _ASCE_REAL_SPACE)
+ limit = -1UL;
+ new = gmap_alloc(limit);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
+ new->private = parent->private;
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+ spin_lock(&parent->shadow_lock);
+ /* Recheck if another CPU created the same shadow */
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ if (sg) {
+ spin_unlock(&parent->shadow_lock);
+ gmap_free(new);
+ return sg;
+ }
+ if (asce & _ASCE_REAL_SPACE) {
+ /* only allow one real-space gmap shadow */
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce & _ASCE_REAL_SPACE) {
+ spin_lock(&sg->guest_table_lock);
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ break;
+ }
+ }
+ }
+ refcount_set(&new->ref_count, 2);
+ list_add(&new->list, &parent->children);
+ if (asce & _ASCE_REAL_SPACE) {
+ /* nothing to protect, return right away */
+ new->initialized = true;
+ spin_unlock(&parent->shadow_lock);
+ return new;
+ }
+ spin_unlock(&parent->shadow_lock);
+ /* protect after insertion, so it will get properly invalidated */
+ mmap_read_lock(parent->mm);
+ rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
+ ((asce & _ASCE_TABLE_LENGTH) + 1),
+ PROT_READ, GMAP_NOTIFY_SHADOW);
+ mmap_read_unlock(parent->mm);
+ spin_lock(&parent->shadow_lock);
+ new->initialized = true;
+ if (rc) {
+ list_del(&new->list);
+ gmap_free(new);
+ new = ERR_PTR(rc);
+ }
+ spin_unlock(&parent->shadow_lock);
+ return new;
+}
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
new file mode 100644
index 000000000000..02adf151d4de
--- /dev/null
+++ b/arch/s390/kvm/gmap.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "gmap.h"
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
+static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ return -EFAULT;
+ if (folio_test_large(folio)) {
+ mmap_read_unlock(gmap->mm);
+ rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
+ mmap_read_lock(gmap->mm);
+ if (rc)
+ return rc;
+ folio = page_folio(page);
+ }
+
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, gmap->mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ /*
+ * In theory a race is possible and the folio might have become
+ * large again before the folio_trylock() above. In that case, no
+ * action is performed and -EAGAIN is returned; the callers will
+ * have to try again later.
+ * In most cases this implies running the VM again, getting the same
+ * exception again, and make another attempt in this function.
+ * This is expected to happen extremely rarely.
+ */
+ if (rc == -E2BIG)
+ return -EAGAIN;
+ /* The folio has too many references, try to shake some off */
+ if (rc == -EBUSY) {
+ mmap_read_unlock(gmap->mm);
+ kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
+ mmap_read_lock(gmap->mm);
+ return -EAGAIN;
+ }
+
+ return rc;
+}
+
+/**
+ * gmap_make_secure() - make one guest page secure
+ * @gmap: the guest gmap
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct kvm *kvm = gmap->private;
+ struct page *page;
+ int rc = 0;
+
+ lockdep_assert_held(&kvm->srcu);
+
+ page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+ mmap_read_lock(gmap->mm);
+ if (page)
+ rc = __gmap_make_secure(gmap, page, uvcb);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(gmap->mm);
+
+ return rc;
+}
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+
+/**
+ * __gmap_destroy_page() - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * See gmap_make_secure(): large folios cannot be secure. Small
+ * folio implies FW_LEVEL_PTE.
+ */
+ if (folio_test_large(folio))
+ return -EFAULT;
+
+ rc = uv_destroy_folio(folio);
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_from_secure_folio(folio);
+
+ return rc;
+}
+
+/**
+ * gmap_destroy_page() - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+ struct page *page;
+ int rc = 0;
+
+ mmap_read_lock(gmap->mm);
+ page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
+ if (page)
+ rc = __gmap_destroy_page(gmap, page);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(gmap->mm);
+ return rc;
+}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
new file mode 100644
index 000000000000..c8f031c9ea5f
--- /dev/null
+++ b/arch/s390/kvm/gmap.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016, 2025
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+
+#ifndef ARCH_KVM_S390_GMAP_H
+#define ARCH_KVM_S390_GMAP_H
+
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
+
+/**
+ * gmap_shadow_valid - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise, the
+ * caller has to request a new shadow gmap in this case.
+ *
+ */
+static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
+#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5bbaadf75dc6..610dd44a948b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -21,6 +21,7 @@
#include "gaccess.h"
#include "trace.h"
#include "trace-s390.h"
+#include "gmap.h"
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
@@ -367,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0);
+ rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
@@ -376,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE);
+ rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
if (rc != 0)
return rc;
@@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
* If the unpin did not succeed, the guest will exit again for the UVC
* and we will retry the unpin.
*/
- if (rc == -EINVAL)
+ if (rc == -EINVAL || rc == -ENXIO)
return 0;
/*
* If we got -EAGAIN here, we simply return it. It will eventually
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d4f031e086fc..07ff0e10cb7f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- u64 uaddr;
+ u64 uaddr_s, uaddr_i;
+ int idx;
switch (ue->type) {
/* we store the userspace addresses instead of the guest addresses */
@@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm,
if (kvm_is_ucontrol(kvm))
return -EINVAL;
e->set = set_adapter_int;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
- if (uaddr == -EFAULT)
- return -EFAULT;
- e->adapter.summary_addr = uaddr;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
- if (uaddr == -EFAULT)
+
+ idx = srcu_read_lock(&kvm->srcu);
+ uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
+ uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
return -EFAULT;
- e->adapter.ind_addr = uaddr;
+ e->adapter.summary_addr = uaddr_s;
+ e->adapter.ind_addr = uaddr_i;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d8080c27d45b..ebecb96bacce 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@
#include "kvm-s390.h"
#include "gaccess.h"
#include "pci.h"
+#include "gmap.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4513,9 +4595,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4766,11 +4847,111 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ flags |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ flags &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
unsigned int flags = 0;
unsigned long gaddr;
- int rc = 0;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4781,9 +4962,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
vcpu->stat.exit_null++;
break;
case PGM_NON_SECURE_STORAGE_ACCESS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This is normal operation; a page belonging to a protected
* guest has not been imported yet. Try to import the page into
@@ -4794,9 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
break;
case PGM_SECURE_STORAGE_ACCESS:
case PGM_SECURE_STORAGE_VIOLATION:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This can happen after a reboot with asynchronous teardown;
* the new guest (normal or protected) will run on top of the
@@ -4825,40 +5002,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- if (vcpu->arch.gmap->pfault_enabled) {
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
- if (rc == -EFAULT)
- return vcpu_post_run_addressing_exception(vcpu);
- if (rc == -EAGAIN) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- } else {
- return rc;
- }
- }
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
- if (rc == -EFAULT) {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- }
- return vcpu_post_run_addressing_exception(vcpu);
- }
- break;
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, flags);
default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
send_sig(SIGSEGV, current, 0);
break;
}
- return rc;
+ return 0;
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -5737,7 +5889,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5853,7 +6007,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
- if (kvm_is_ucontrol(kvm))
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
return -EINVAL;
/* When we are protected, we should not change the memory slots */
@@ -5905,6 +6059,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 597d7a71deeb..8d3bbb2dd8d2 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,6 +20,8 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+
static inline void kvm_s390_fpu_store(struct kvm_run *run)
{
fpu_stfpc(&run->s.regs.fpc);
@@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
return gd;
}
+static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
+{
+ hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+ if (!kvm_is_error_hva(hva))
+ hva |= offset_in_page(gpa);
+ return hva;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -408,6 +419,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits);
+
+static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
+{
+ return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
+}
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 75e81ba26d04..22c012aa5206 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,7 @@
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+#include "gmap.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
@@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[1] = offset,
};
int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+ unsigned long vmaddr;
+ bool unlocked;
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
+ if (ret == -ENXIO) {
+ mmap_read_lock(kvm->mm);
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(vmaddr)) {
+ ret = -EFAULT;
+ } else {
+ ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!ret)
+ ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
+ }
+ mmap_read_unlock(kvm->mm);
+ if (!ret)
+ return -EAGAIN;
+ return ret;
+ }
+
if (ret && ret != -EAGAIN)
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
uvcb.gaddr, *rc, *rrc);
@@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
addr, size);
+ guard(srcu)(&kvm->srcu);
+
while (offset < size) {
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
if (ret == -EAGAIN) {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a687695d8f68..a78df3a4f353 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -13,6 +13,7 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/io.h>
+#include <linux/mman.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
@@ -22,6 +23,11 @@
#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "gmap.h"
+
+enum vsie_page_flags {
+ VSIE_PAGE_IN_USE = 0,
+};
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
@@ -46,7 +52,18 @@ struct vsie_page {
gpa_t gvrd_gpa; /* 0x0240 */
gpa_t riccbd_gpa; /* 0x0248 */
gpa_t sdnx_gpa; /* 0x0250 */
- __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
+ /*
+ * guest address of the original SCB. Remains set for free vsie
+ * pages, so we can properly look them up in our addr_to_page
+ * radix tree.
+ */
+ gpa_t scb_gpa; /* 0x0258 */
+ /*
+ * Flags: must be set/cleared atomically after the vsie page can be
+ * looked up by other CPUs.
+ */
+ unsigned long flags; /* 0x0260 */
+ __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
@@ -584,7 +601,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct vsie_page *cur;
unsigned long prefix;
- struct page *page;
int i;
if (!gmap_is_shadow(gmap))
@@ -594,10 +610,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
* therefore we can safely reference them all the time.
*/
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = READ_ONCE(kvm->arch.vsie.pages[i]);
- if (!page)
+ cur = READ_ONCE(kvm->arch.vsie.pages[i]);
+ if (!cur)
continue;
- cur = page_to_virt(page);
if (READ_ONCE(cur->gmap) != gmap)
continue;
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
@@ -1345,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return rc;
}
+/* Try getting a given vsie page, returning "true" on success. */
+static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
+{
+ if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
+ return false;
+ return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
+/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
+static void put_vsie_page(struct vsie_page *vsie_page)
+{
+ clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
/*
* Get or create a vsie page for a scb address.
*
@@ -1355,16 +1384,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
{
struct vsie_page *vsie_page;
- struct page *page;
int nr_vcpus;
rcu_read_lock();
- page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+ vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
rcu_read_unlock();
- if (page) {
- if (page_ref_inc_return(page) == 2)
- return page_to_virt(page);
- page_ref_dec(page);
+ if (vsie_page) {
+ if (try_get_vsie_page(vsie_page)) {
+ if (vsie_page->scb_gpa == addr)
+ return vsie_page;
+ /*
+ * We raced with someone reusing + putting this vsie
+ * page before we grabbed it.
+ */
+ put_vsie_page(vsie_page);
+ }
}
/*
@@ -1375,36 +1409,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
mutex_lock(&kvm->arch.vsie.mutex);
if (kvm->arch.vsie.page_count < nr_vcpus) {
- page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
- if (!page) {
+ vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+ if (!vsie_page) {
mutex_unlock(&kvm->arch.vsie.mutex);
return ERR_PTR(-ENOMEM);
}
- page_ref_inc(page);
- kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+ __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+ kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
kvm->arch.vsie.page_count++;
} else {
/* reuse an existing entry that belongs to nobody */
while (true) {
- page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
- if (page_ref_inc_return(page) == 2)
+ vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+ if (try_get_vsie_page(vsie_page))
break;
- page_ref_dec(page);
kvm->arch.vsie.next++;
kvm->arch.vsie.next %= nr_vcpus;
}
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
}
- page->index = addr;
- /* double use of the same address */
- if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
- page_ref_dec(page);
+ /* Mark it as invalid until it resides in the tree. */
+ vsie_page->scb_gpa = ULONG_MAX;
+
+ /* Double use of the same address or allocation failure. */
+ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
+ vsie_page)) {
+ put_vsie_page(vsie_page);
mutex_unlock(&kvm->arch.vsie.mutex);
return NULL;
}
+ vsie_page->scb_gpa = addr;
mutex_unlock(&kvm->arch.vsie.mutex);
- vsie_page = page_to_virt(page);
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
release_gmap_shadow(vsie_page);
vsie_page->fault_addr = 0;
@@ -1412,14 +1450,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
return vsie_page;
}
-/* put a vsie page acquired via get_vsie_page */
-static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
-{
- struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
-
- page_ref_dec(page);
-}
-
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
{
struct vsie_page *vsie_page;
@@ -1470,7 +1500,7 @@ out_unshadow:
out_unpin_scb:
unpin_scb(vcpu, vsie_page, scb_addr);
out_put:
- put_vsie_page(vcpu->kvm, vsie_page);
+ put_vsie_page(vsie_page);
return rc < 0 ? rc : 0;
}
@@ -1486,18 +1516,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
void kvm_s390_vsie_destroy(struct kvm *kvm)
{
struct vsie_page *vsie_page;
- struct page *page;
int i;
mutex_lock(&kvm->arch.vsie.mutex);
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = kvm->arch.vsie.pages[i];
+ vsie_page = kvm->arch.vsie.pages[i];
kvm->arch.vsie.pages[i] = NULL;
- vsie_page = page_to_virt(page);
release_gmap_shadow(vsie_page);
/* free the radix tree entry */
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
- __free_page(page);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
+ free_page((unsigned long)vsie_page);
}
kvm->arch.vsie.page_count = 0;
mutex_unlock(&kvm->arch.vsie.mutex);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 16b8a36c56de..94d927785800 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -24,6 +24,16 @@
#include <asm/page.h>
#include <asm/tlb.h>
+/*
+ * The address is saved in a radix tree directly; NULL would be ambiguous,
+ * since 0 is a valid address, and NULL is returned when nothing was found.
+ * The lower bits are ignored by all users of the macro, so it can be used
+ * to distinguish a valid address 0 from a NULL.
+ */
+#define VALID_GADDR_FLAG 1
+#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
+#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
+
#define GMAP_SHADOW_FAKE_TABLE 1ULL
static struct page *gmap_alloc_crst(void)
@@ -43,7 +53,7 @@ static struct page *gmap_alloc_crst(void)
*
* Returns a guest address space structure.
*/
-static struct gmap *gmap_alloc(unsigned long limit)
+struct gmap *gmap_alloc(unsigned long limit)
{
struct gmap *gmap;
struct page *page;
@@ -70,9 +80,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
if (!gmap)
goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
INIT_LIST_HEAD(&gmap->children);
- INIT_LIST_HEAD(&gmap->pt_list);
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
@@ -82,8 +90,6 @@ static struct gmap *gmap_alloc(unsigned long limit)
page = gmap_alloc_crst();
if (!page)
goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
@@ -97,6 +103,7 @@ out_free:
out:
return NULL;
}
+EXPORT_SYMBOL_GPL(gmap_alloc);
/**
* gmap_create - create a guest address space
@@ -185,32 +192,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
} while (nr > 0);
}
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+ bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+ int i;
+
+ if (is_segment) {
+ if (!free_ptes)
+ goto out;
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+ page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+ } else {
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _REGION_ENTRY_INVALID))
+ gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+ }
+
+out:
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
*
* No locks required. There are no references to this gmap anymore.
*/
-static void gmap_free(struct gmap *gmap)
+void gmap_free(struct gmap *gmap)
{
- struct page *page, *next;
-
/* Flush tlb of all gmaps (if not already done for shadows) */
if (!(gmap_is_shadow(gmap) && gmap->removed))
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
- struct ptdesc *ptdesc, *n;
-
- /* Free all page tables. */
- list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
- page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
@@ -218,6 +239,7 @@ static void gmap_free(struct gmap *gmap)
kfree(gmap);
}
+EXPORT_SYMBOL_GPL(gmap_free);
/**
* gmap_get - increase reference counter for guest address space
@@ -298,10 +320,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
*table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
page = NULL;
}
spin_unlock(&gmap->guest_table_lock);
@@ -310,21 +330,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
return 0;
}
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
{
- struct page *page;
- unsigned long offset;
+ return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- page = pmd_pgtable_page((pmd_t *) entry);
- return page->index + offset;
+static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
+{
+ return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
+
+static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
+ unsigned long *gaddr)
+{
+ *gaddr = host_to_guest_delete(gmap, vmaddr);
+ if (IS_GADDR_VALID(*gaddr))
+ return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
+ return NULL;
}
/**
@@ -336,16 +358,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry)
*/
static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
{
- unsigned long *entry;
+ unsigned long gaddr;
int flush = 0;
+ pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_EMPTY);
- *entry = _SEGMENT_ENTRY_EMPTY;
+
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
+ flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
+
spin_unlock(&gmap->guest_table_lock);
return flush;
}
@@ -464,26 +489,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- unsigned long rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_translate(gmap, gaddr);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_translate);
-
-/**
* gmap_unlink - disconnect a page table from the gmap shadow tables
* @mm: pointer to the parent mm_struct
* @table: pointer to the host page table
@@ -582,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
spin_lock(&gmap->guest_table_lock);
if (*table == _SEGMENT_ENTRY_EMPTY) {
rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
+ vmaddr >> PMD_SHIFT,
+ (void *)MAKE_VALID_GADDR(gaddr));
if (!rc) {
if (pmd_leaf(*pmd)) {
*table = (pmd_val(*pmd) &
@@ -605,130 +611,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
radix_tree_preload_end();
return rc;
}
-
-/**
- * fixup_user_fault_nowait - manually resolve a user page fault without waiting
- * @mm: mm_struct of target mm
- * @address: user address
- * @fault_flags:flags to pass down to handle_mm_fault()
- * @unlocked: did we unlock the mmap_lock while retrying
- *
- * This function behaves similarly to fixup_user_fault(), but it guarantees
- * that the fault will be resolved without waiting. The function might drop
- * and re-acquire the mm lock, in which case @unlocked will be set to true.
- *
- * The guarantee is that the fault is handled without waiting, but the
- * function itself might sleep, due to the lock.
- *
- * Context: Needs to be called with mm->mmap_lock held in read mode, and will
- * return with the lock held in read mode; @unlocked will indicate whether
- * the lock has been dropped and re-acquired. This is the same behaviour as
- * fixup_user_fault().
- *
- * Return: 0 on success, -EAGAIN if the fault cannot be resolved without
- * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of
- * memory.
- */
-static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address,
- unsigned int fault_flags, bool *unlocked)
-{
- struct vm_area_struct *vma;
- unsigned int test_flags;
- vm_fault_t fault;
- int rc;
-
- fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
- test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ;
-
- vma = find_vma(mm, address);
- if (unlikely(!vma || address < vma->vm_start))
- return -EFAULT;
- if (unlikely(!(vma->vm_flags & test_flags)))
- return -EFAULT;
-
- fault = handle_mm_fault(vma, address, fault_flags, NULL);
- /* the mm lock has been dropped, take it again */
- if (fault & VM_FAULT_COMPLETED) {
- *unlocked = true;
- mmap_read_lock(mm);
- return 0;
- }
- /* the mm lock has not been dropped */
- if (fault & VM_FAULT_ERROR) {
- rc = vm_fault_to_errno(fault, 0);
- BUG_ON(!rc);
- return rc;
- }
- /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */
- if (fault & VM_FAULT_RETRY)
- return -EAGAIN;
- /* nothing needed to be done and the mm lock has not been dropped */
- return 0;
-}
-
-/**
- * __gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Context: Needs to be called with mm->mmap_lock held in read mode. Might
- * drop and re-acquire the lock. Will always return with the lock held.
- */
-static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags)
-{
- unsigned long vmaddr;
- bool unlocked;
- int rc = 0;
-
-retry:
- unlocked = false;
-
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
-
- if (fault_flags & FAULT_FLAG_RETRY_NOWAIT)
- rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked);
- else
- rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked);
- if (rc)
- return rc;
- /*
- * In the case that fixup_user_fault unlocked the mmap_lock during
- * fault-in, redo __gmap_translate() to avoid racing with a
- * map/unmap_segment.
- * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(),
- * and __gmap_link() must all be called atomically in one go; if the
- * lock had been dropped in between, a retry is needed.
- */
- if (unlocked)
- goto retry;
-
- return __gmap_link(gmap, gaddr, vmaddr);
-}
-
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the
- * vm address is already mapped to a different guest segment, and -EAGAIN if
- * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed
- * immediately.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags)
-{
- int rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_fault(gmap, gaddr, fault_flags);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_fault);
+EXPORT_SYMBOL(__gmap_link);
/*
* this function is assumed to be called with mmap_lock held
@@ -853,8 +736,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
*
* Note: Can also be called for shadow gmaps.
*/
-static inline unsigned long *gmap_table_walk(struct gmap *gmap,
- unsigned long gaddr, int level)
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
{
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table = gmap->table;
@@ -905,6 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
}
return table;
}
+EXPORT_SYMBOL(gmap_table_walk);
/**
* gmap_pte_op_walk - walk the gmap page table, get the page table lock
@@ -1101,86 +984,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
* @bits: pgste notification bits to set
*
- * Returns 0 if successfully protected, -ENOMEM if out of memory and
- * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ * Returns:
+ * PAGE_SIZE if a small page was successfully protected;
+ * HPAGE_SIZE if a large page was successfully protected;
+ * -ENOMEM if out of memory;
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing);
+ * -EAGAIN if the guest mapping is missing and should be fixed by the caller.
*
- * Called with sg->mm->mmap_lock in read.
+ * Context: Called with sg->mm->mmap_lock in read.
*/
-static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot, unsigned long bits)
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
{
- unsigned long vmaddr, dist;
pmd_t *pmdp;
- int rc;
+ int rc = 0;
BUG_ON(gmap_is_shadow(gmap));
- while (len) {
- rc = -EAGAIN;
- pmdp = gmap_pmd_op_walk(gmap, gaddr);
- if (pmdp) {
- if (!pmd_leaf(*pmdp)) {
- rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- len -= PAGE_SIZE;
- gaddr += PAGE_SIZE;
- }
- } else {
- rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
- len = len < dist ? 0 : len - dist;
- gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
- }
- }
- gmap_pmd_op_end(gmap, pmdp);
- }
- if (rc) {
- if (rc == -EINVAL)
- return rc;
- /* -EAGAIN, fixup of userspace mm and gmap */
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
- rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
- if (rc)
- return rc;
- }
- }
- return 0;
-}
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return -EAGAIN;
-/**
- * gmap_mprotect_notify - change access rights for a range of ptes and
- * call the notifier if any pte changes again
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
- *
- * Returns 0 if for each page in the given range a gmap mapping exists,
- * the new access rights could be set and the notifier could be armed.
- * If the gmap mapping is missing for one or more pages -EFAULT is
- * returned. If no memory could be allocated -ENOMEM is returned.
- * This function establishes missing page table entries.
- */
-int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot)
-{
- int rc;
+ if (!pmd_leaf(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = PAGE_SIZE;
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = HPAGE_SIZE;
+ }
+ gmap_pmd_op_end(gmap, pmdp);
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
- return -EINVAL;
- if (!MACHINE_HAS_ESOP && prot == PROT_READ)
- return -EINVAL;
- mmap_read_lock(gmap->mm);
- rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
- mmap_read_unlock(gmap->mm);
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+EXPORT_SYMBOL_GPL(gmap_protect_one);
/**
* gmap_read_table - get an unsigned long value from a guest page table using
@@ -1414,7 +1251,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
@@ -1442,7 +1278,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
}
@@ -1472,7 +1307,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1500,7 +1334,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1530,7 +1363,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1558,7 +1390,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1588,7 +1419,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1620,7 +1450,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1631,7 +1460,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
*
* Called with sg->guest_table_lock
*/
-static void gmap_unshadow(struct gmap *sg)
+void gmap_unshadow(struct gmap *sg)
{
unsigned long *table;
@@ -1657,143 +1486,7 @@ static void gmap_unshadow(struct gmap *sg)
break;
}
}
-
-/**
- * gmap_find_shadow - find a specific asce in the list of shadow tables
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns the pointer to a gmap if a shadow table with the given asce is
- * already available, ERR_PTR(-EAGAIN) if another one is just being created,
- * otherwise NULL
- */
-static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg;
-
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce != asce || sg->edat_level != edat_level ||
- sg->removed)
- continue;
- if (!sg->initialized)
- return ERR_PTR(-EAGAIN);
- refcount_inc(&sg->ref_count);
- return sg;
- }
- return NULL;
-}
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- * given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
- if (sg->removed)
- return 0;
- return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow_valid);
-
-/**
- * gmap_shadow - create/find a shadow guest address space
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * The pages of the top level page table referred by the asce parameter
- * will be set to read-only and marked in the PGSTEs of the kvm process.
- * The shadow table will be removed automatically on any change to the
- * PTE mapping for the source table.
- *
- * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
- * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
- * parent gmap table could not be protected.
- */
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg, *new;
- unsigned long limit;
- int rc;
-
- BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
- BUG_ON(gmap_is_shadow(parent));
- spin_lock(&parent->shadow_lock);
- sg = gmap_find_shadow(parent, asce, edat_level);
- spin_unlock(&parent->shadow_lock);
- if (sg)
- return sg;
- /* Create a new shadow gmap */
- limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
- if (asce & _ASCE_REAL_SPACE)
- limit = -1UL;
- new = gmap_alloc(limit);
- if (!new)
- return ERR_PTR(-ENOMEM);
- new->mm = parent->mm;
- new->parent = gmap_get(parent);
- new->private = parent->private;
- new->orig_asce = asce;
- new->edat_level = edat_level;
- new->initialized = false;
- spin_lock(&parent->shadow_lock);
- /* Recheck if another CPU created the same shadow */
- sg = gmap_find_shadow(parent, asce, edat_level);
- if (sg) {
- spin_unlock(&parent->shadow_lock);
- gmap_free(new);
- return sg;
- }
- if (asce & _ASCE_REAL_SPACE) {
- /* only allow one real-space gmap shadow */
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce & _ASCE_REAL_SPACE) {
- spin_lock(&sg->guest_table_lock);
- gmap_unshadow(sg);
- spin_unlock(&sg->guest_table_lock);
- list_del(&sg->list);
- gmap_put(sg);
- break;
- }
- }
- }
- refcount_set(&new->ref_count, 2);
- list_add(&new->list, &parent->children);
- if (asce & _ASCE_REAL_SPACE) {
- /* nothing to protect, return right away */
- new->initialized = true;
- spin_unlock(&parent->shadow_lock);
- return new;
- }
- spin_unlock(&parent->shadow_lock);
- /* protect after insertion, so it will get properly invalidated */
- mmap_read_lock(parent->mm);
- rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
- PROT_READ, GMAP_NOTIFY_SHADOW);
- mmap_read_unlock(parent->mm);
- spin_lock(&parent->shadow_lock);
- new->initialized = true;
- if (rc) {
- list_del(&new->list);
- gmap_free(new);
- new = ERR_PTR(rc);
- }
- spin_unlock(&parent->shadow_lock);
- return new;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow);
+EXPORT_SYMBOL(gmap_unshadow);
/**
* gmap_shadow_r2t - create an empty shadow region 2 table
@@ -1827,9 +1520,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r2t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r2t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1851,7 +1541,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1911,9 +1600,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r3t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r3t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1935,7 +1621,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1995,9 +1680,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = sgt & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_sgt = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -2019,7 +1701,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -2052,45 +1733,22 @@ out_free:
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
-/**
- * gmap_shadow_pgt_lookup - find a shadow page table
- * @sg: pointer to the shadow guest address space structure
- * @saddr: the address in the shadow aguest address space
- * @pgt: parent gmap address of the page table to get shadowed
- * @dat_protection: if the pgtable is marked as protected by dat
- * @fake: pgt references contiguous guest memory block, not a pgtable
- *
- * Returns 0 if the shadow page table was found and -EAGAIN if the page
- * table was not found.
- *
- * Called with sg->mm->mmap_lock in read.
- */
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection,
- int *fake)
+static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
{
- unsigned long *table;
- struct page *page;
- int rc;
+ unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
- BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->guest_table_lock);
- table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
- if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
- /* Shadow page tables are full pages (pte+pgste) */
- page = pfn_to_page(*table >> PAGE_SHIFT);
- *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
- *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
- *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
- rc = 0;
- } else {
- rc = -EAGAIN;
- }
- spin_unlock(&sg->guest_table_lock);
- return rc;
+ pgstes += _PAGE_ENTRIES;
+
+ pgstes[0] &= ~PGSTE_ST2_MASK;
+ pgstes[1] &= ~PGSTE_ST2_MASK;
+ pgstes[2] &= ~PGSTE_ST2_MASK;
+ pgstes[3] &= ~PGSTE_ST2_MASK;
+ pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
+ pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
+ pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
+ pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
}
-EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
/**
* gmap_shadow_pgt - instantiate a shadow page table
@@ -2119,9 +1777,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
ptdesc = page_table_alloc_pgste(sg->mm);
if (!ptdesc)
return -ENOMEM;
- ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN;
+ origin = pgt & _SEGMENT_ENTRY_ORIGIN;
if (fake)
- ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE;
+ origin |= GMAP_SHADOW_FAKE_TABLE;
+ gmap_pgste_set_pgt_addr(ptdesc, origin);
s_pgt = page_to_phys(ptdesc_page(ptdesc));
/* Install shadow page table */
spin_lock(&sg->guest_table_lock);
@@ -2140,7 +1799,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2318,7 +1976,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
unsigned long offset, gaddr = 0;
- unsigned long *table;
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
@@ -2326,12 +1983,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (table)
- gaddr = __gmap_segment_gaddr(table) + offset;
+ gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (!table)
+ if (!IS_GADDR_VALID(gaddr))
continue;
if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
@@ -2391,10 +2045,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
if (pmdp) {
- gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
_SEGMENT_ENTRY_GMAP_UC |
@@ -2438,28 +2090,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
*/
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC |
- _SEGMENT_ENTRY));
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_LOCAL);
else if (MACHINE_HAS_IDTE)
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2474,22 +2123,19 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
*/
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC |
- _SEGMENT_ENTRY));
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_GLOBAL);
@@ -2497,7 +2143,7 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2943,49 +2589,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
/**
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
- * list of page tables of the gmap.
- * @gmap: the gmap whose table is to be removed
- *
- * On s390x, KVM keeps a list of all pages containing the page tables of the
- * gmap (the CRST list). This list is used at tear down time to free all
- * pages that are now not needed anymore.
- *
- * This function removes the topmost page of the tree (the one pointed to by
- * the ASCE) from the CRST list.
- *
- * This means that it will not be freed when the VM is torn down, and needs
- * to be handled separately by the caller, unless a leak is actually
- * intended. Notice that this function will only remove the page from the
- * list, the page will still be used as a top level page table (and ASCE).
- */
-void s390_unlist_old_asce(struct gmap *gmap)
-{
- struct page *old;
-
- old = virt_to_page(gmap->table);
- spin_lock(&gmap->guest_table_lock);
- list_del(&old->lru);
- /*
- * Sometimes the topmost page might need to be "removed" multiple
- * times, for example if the VM is rebooted into secure mode several
- * times concurrently, or if s390_replace_asce fails after calling
- * s390_remove_old_asce and is attempted again later. In that case
- * the old asce has been removed from the list, and therefore it
- * will not be freed when the VM terminates, but the ASCE is still
- * in use and still pointed to.
- * A subsequent call to replace_asce will follow the pointer and try
- * to remove the same page from the list again.
- * Therefore it's necessary that the page of the ASCE has valid
- * pointers, so list_del can work (and do nothing) without
- * dereferencing stale or invalid pointers.
- */
- INIT_LIST_HEAD(&old->lru);
- spin_unlock(&gmap->guest_table_lock);
-}
-EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
-
-/**
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
*
@@ -3004,8 +2607,6 @@ int s390_replace_asce(struct gmap *gmap)
struct page *page;
void *table;
- s390_unlist_old_asce(gmap);
-
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
@@ -3013,19 +2614,9 @@ int s390_replace_asce(struct gmap *gmap)
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = 0;
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
- /*
- * The caller has to deal with the old ASCE, but here we make sure
- * the new one is properly added to the CRST list, so that
- * it will be freed when the VM is torn down.
- */
- spin_lock(&gmap->guest_table_lock);
- list_add(&page->lru, &gmap->crst_list);
- spin_unlock(&gmap->guest_table_lock);
-
/* Set new table origin while preserving existing ASCE control bits */
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
WRITE_ONCE(gmap->asce, asce);
@@ -3035,3 +2626,31 @@ int s390_replace_asce(struct gmap *gmap)
return 0;
}
EXPORT_SYMBOL_GPL(s390_replace_asce);
+
+/**
+ * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ * @split: whether to split a large folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ */
+int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
+{
+ int rc;
+
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
+ if (split) {
+ folio_lock(folio);
+ rc = split_folio(folio);
+ folio_unlock(folio);
+
+ if (rc != -EBUSY)
+ return rc;
+ }
+ return -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index cd2fef79ad2c..30387a6e98ff 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -176,8 +176,6 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
}
table = ptdesc_to_virt(ptdesc);
__arch_set_page_dat(table, 1);
- /* pt_list is used by gmap only */
- INIT_LIST_HEAD(&ptdesc->pt_list);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f2051644de94..606c74f27459 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -25,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
# avoid errors with '-march=i386', and future flags may depend on the target to
# be valid.
KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS)
+KBUILD_CFLAGS += -std=gnu11
KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
KBUILD_CFLAGS += -Wundef
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ do_cmd_auto:
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ do_cmd_auto:
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ ibpb_on_vmexit:
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2cbb3874ad39..8eb3a88707f2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1180,7 +1180,7 @@ void kvm_set_cpu_caps(void)
SYNTHESIZED_F(SBPB),
SYNTHESIZED_F(IBPB_BRTYPE),
SYNTHESIZED_F(SRSO_NO),
- SYNTHESIZED_F(SRSO_USER_KERNEL_NO),
+ F(SRSO_USER_KERNEL_NO),
);
kvm_cpu_cap_init(CPUID_8000_0022_EAX,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a45ae60e84ab..74c20dbb92da 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7120,6 +7120,19 @@ static void mmu_destroy_caches(void)
kmem_cache_destroy(mmu_page_header_cache);
}
+static void kvm_wake_nx_recovery_thread(struct kvm *kvm)
+{
+ /*
+ * The NX recovery thread is spawned on-demand at the first KVM_RUN and
+ * may not be valid even though the VM is globally visible. Do nothing,
+ * as such a VM can't have any possible NX huge pages.
+ */
+ struct vhost_task *nx_thread = READ_ONCE(kvm->arch.nx_huge_page_recovery_thread);
+
+ if (nx_thread)
+ vhost_task_wake(nx_thread);
+}
+
static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
{
if (nx_hugepage_mitigation_hard_disabled)
@@ -7180,7 +7193,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
kvm_mmu_zap_all_fast(kvm);
mutex_unlock(&kvm->slots_lock);
- vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
+ kvm_wake_nx_recovery_thread(kvm);
}
mutex_unlock(&kvm_lock);
}
@@ -7315,7 +7328,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
- vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread);
+ kvm_wake_nx_recovery_thread(kvm);
mutex_unlock(&kvm_lock);
}
@@ -7451,14 +7464,20 @@ static void kvm_mmu_start_lpage_recovery(struct once *once)
{
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);
+ struct vhost_task *nx_thread;
kvm->arch.nx_huge_page_last = get_jiffies_64();
- kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
- kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
- kvm, "kvm-nx-lpage-recovery");
+ nx_thread = vhost_task_create(kvm_nx_huge_page_recovery_worker,
+ kvm_nx_huge_page_recovery_worker_kill,
+ kvm, "kvm-nx-lpage-recovery");
- if (kvm->arch.nx_huge_page_recovery_thread)
- vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+ if (!nx_thread)
+ return;
+
+ vhost_task_start(nx_thread);
+
+ /* Make the task visible only once it is fully started. */
+ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d4a6734b2d6..8e77e61d4fbd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12741,6 +12741,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
"does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n");
}
+ once_init(&kvm->arch.nx_once);
return 0;
out_uninit_mmu:
@@ -12750,12 +12751,6 @@ out:
return ret;
}
-int kvm_arch_post_init_vm(struct kvm *kvm)
-{
- once_init(&kvm->arch.nx_once);
- return 0;
-}
-
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 9252652afe59..894edf8d6d62 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -100,9 +100,6 @@ SYM_FUNC_START(xen_hypercall_hvm)
push %r10
push %r9
push %r8
-#ifdef CONFIG_FRAME_POINTER
- pushq $0 /* Dummy push for stack alignment. */
-#endif
#endif
/* Set the vendor specific function. */
call __xen_hypercall_setfunc
@@ -117,11 +114,8 @@ SYM_FUNC_START(xen_hypercall_hvm)
pop %ebx
pop %eax
#else
- lea xen_hypercall_amd(%rip), %rbx
- cmp %rax, %rbx
-#ifdef CONFIG_FRAME_POINTER
- pop %rax /* Dummy pop. */
-#endif
+ lea xen_hypercall_amd(%rip), %rcx
+ cmp %rax, %rcx
pop %r8
pop %r9
pop %r10
@@ -132,6 +126,7 @@ SYM_FUNC_START(xen_hypercall_hvm)
pop %rcx
pop %rax
#endif
+ FRAME_END
/* Use correct hypercall function. */
jz xen_hypercall_amd
jmp xen_hypercall_intel
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 97d4a032171f..f5b8497cf5ad 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -21,6 +21,11 @@
#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin");
+MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
+
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
* (device_id, rev_id) pair as a key to select the devices. The devices with
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 1e8ffbe25eee..38cf1c342c72 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -397,15 +397,19 @@ int ivpu_boot(struct ivpu_device *vdev)
if (ivpu_fw_is_cold_boot(vdev)) {
ret = ivpu_pm_dct_init(vdev);
if (ret)
- goto err_diagnose_failure;
+ goto err_disable_ipc;
ret = ivpu_hw_sched_init(vdev);
if (ret)
- goto err_diagnose_failure;
+ goto err_disable_ipc;
}
return 0;
+err_disable_ipc:
+ ivpu_ipc_disable(vdev);
+ ivpu_hw_irq_disable(vdev);
+ disable_irq(vdev->irq);
err_diagnose_failure:
ivpu_hw_diagnose_failure(vdev);
ivpu_mmu_evtq_dump(vdev);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 87d7411ae059..5060c5dd40d1 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -115,41 +115,57 @@ err_power_down:
return ret;
}
-static void ivpu_pm_recovery_work(struct work_struct *work)
+static void ivpu_pm_reset_begin(struct ivpu_device *vdev)
{
- struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
- struct ivpu_device *vdev = pm->vdev;
- char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
- int ret;
-
- ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
-
- ret = pm_runtime_resume_and_get(vdev->drm.dev);
- if (ret)
- ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
-
- ivpu_jsm_state_dump(vdev);
- ivpu_dev_coredump(vdev);
+ pm_runtime_disable(vdev->drm.dev);
atomic_inc(&vdev->pm->reset_counter);
atomic_set(&vdev->pm->reset_pending, 1);
down_write(&vdev->pm->reset_lock);
+}
+
+static void ivpu_pm_reset_complete(struct ivpu_device *vdev)
+{
+ int ret;
- ivpu_suspend(vdev);
ivpu_pm_prepare_cold_boot(vdev);
ivpu_jobs_abort_all(vdev);
ivpu_ms_cleanup_all(vdev);
ret = ivpu_resume(vdev);
- if (ret)
+ if (ret) {
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
+ pm_runtime_set_suspended(vdev->drm.dev);
+ } else {
+ pm_runtime_set_active(vdev->drm.dev);
+ }
up_write(&vdev->pm->reset_lock);
atomic_set(&vdev->pm->reset_pending, 0);
- kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
pm_runtime_mark_last_busy(vdev->drm.dev);
- pm_runtime_put_autosuspend(vdev->drm.dev);
+ pm_runtime_enable(vdev->drm.dev);
+}
+
+static void ivpu_pm_recovery_work(struct work_struct *work)
+{
+ struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
+ struct ivpu_device *vdev = pm->vdev;
+ char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
+
+ ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
+
+ ivpu_pm_reset_begin(vdev);
+
+ if (!pm_runtime_status_suspended(vdev->drm.dev)) {
+ ivpu_jsm_state_dump(vdev);
+ ivpu_dev_coredump(vdev);
+ ivpu_suspend(vdev);
+ }
+
+ ivpu_pm_reset_complete(vdev);
+
+ kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
}
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
@@ -309,7 +325,10 @@ int ivpu_rpm_get(struct ivpu_device *vdev)
int ret;
ret = pm_runtime_resume_and_get(vdev->drm.dev);
- drm_WARN_ON(&vdev->drm, ret < 0);
+ if (ret < 0) {
+ ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
+ pm_runtime_set_suspended(vdev->drm.dev);
+ }
return ret;
}
@@ -325,16 +344,13 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
struct ivpu_device *vdev = pci_get_drvdata(pdev);
ivpu_dbg(vdev, PM, "Pre-reset..\n");
- atomic_inc(&vdev->pm->reset_counter);
- atomic_set(&vdev->pm->reset_pending, 1);
- pm_runtime_get_sync(vdev->drm.dev);
- down_write(&vdev->pm->reset_lock);
- ivpu_prepare_for_reset(vdev);
- ivpu_hw_reset(vdev);
- ivpu_pm_prepare_cold_boot(vdev);
- ivpu_jobs_abort_all(vdev);
- ivpu_ms_cleanup_all(vdev);
+ ivpu_pm_reset_begin(vdev);
+
+ if (!pm_runtime_status_suspended(vdev->drm.dev)) {
+ ivpu_prepare_for_reset(vdev);
+ ivpu_hw_reset(vdev);
+ }
ivpu_dbg(vdev, PM, "Pre-reset done.\n");
}
@@ -342,18 +358,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
{
struct ivpu_device *vdev = pci_get_drvdata(pdev);
- int ret;
ivpu_dbg(vdev, PM, "Post-reset..\n");
- ret = ivpu_resume(vdev);
- if (ret)
- ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
- up_write(&vdev->pm->reset_lock);
- atomic_set(&vdev->pm->reset_pending, 0);
- ivpu_dbg(vdev, PM, "Post-reset done.\n");
- pm_runtime_mark_last_busy(vdev->drm.dev);
- pm_runtime_put_autosuspend(vdev->drm.dev);
+ ivpu_pm_reset_complete(vdev);
+
+ ivpu_dbg(vdev, PM, "Post-reset done.\n");
}
void ivpu_pm_init(struct ivpu_device *vdev)
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 747f83f7114d..e549914a636c 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -287,9 +287,7 @@ static acpi_status acpi_platformrt_space_handler(u32 function,
if (!handler || !module)
goto invalid_guid;
- if (!handler->handler_addr ||
- !handler->static_data_buffer_addr ||
- !handler->acpi_param_buffer_addr) {
+ if (!handler->handler_addr) {
buffer->prm_status = PRM_HANDLER_ERROR;
return AE_OK;
}
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 98d93ed58315..436019d96027 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1187,8 +1187,6 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
}
break;
}
- if (nval == 0)
- return -EINVAL;
if (obj->type == ACPI_TYPE_BUFFER) {
if (proptype != DEV_PROP_U8)
@@ -1212,9 +1210,11 @@ static int acpi_data_prop_read(const struct acpi_device_data *data,
ret = acpi_copy_property_array_uint(items, (u64 *)val, nval);
break;
case DEV_PROP_STRING:
- ret = acpi_copy_property_array_string(
- items, (char **)val,
- min_t(u32, nval, obj->package.count));
+ nval = min_t(u32, nval, obj->package.count);
+ if (nval == 0)
+ return -ENODATA;
+
+ ret = acpi_copy_property_array_string(items, (char **)val, nval);
break;
default:
ret = -EINVAL;
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 90aaec923889..b4cd14e7fa76 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -564,6 +564,12 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = {
},
},
{
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Eluktronics Inc."),
+ DMI_MATCH(DMI_BOARD_NAME, "MECH-17"),
+ },
+ },
+ {
/* TongFang GM6XGxX/TUXEDO Stellaris 16 Gen5 AMD */
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "GM6XGxX"),
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index d497d448e4b2..40e1d8d8a589 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state)
return PMSG_ON;
}
-static void dpm_superior_set_must_resume(struct device *dev, bool set_active)
+static void dpm_superior_set_must_resume(struct device *dev)
{
struct device_link *link;
int idx;
- if (dev->parent) {
+ if (dev->parent)
dev->parent->power.must_resume = true;
- if (set_active)
- dev->parent->power.set_active = true;
- }
idx = device_links_read_lock();
- list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
+ list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
link->supplier->power.must_resume = true;
- if (set_active)
- link->supplier->power.set_active = true;
- }
device_links_read_unlock(idx);
}
@@ -1287,9 +1281,12 @@ Skip:
dev->power.must_resume = true;
if (dev->power.must_resume) {
- dev->power.set_active = dev->power.set_active ||
- dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
- dpm_superior_set_must_resume(dev, dev->power.set_active);
+ if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) {
+ dev->power.set_active = true;
+ if (dev->parent && !dev->parent->power.ignore_children)
+ dev->parent->power.set_active = true;
+ }
+ dpm_superior_set_must_resume(dev);
}
Complete:
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 33b3bc99d532..282f81616a78 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -1127,8 +1127,8 @@ static void vdc_queue_drain(struct vdc_port *port)
spin_lock_irq(&port->vio.lock);
port->drain = 0;
- blk_mq_unquiesce_queue(q, memflags);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
static void vdc_ldc_reset_timer_work(struct work_struct *work)
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
index 6276551d7968..1e57ebfb7622 100644
--- a/drivers/bus/moxtet.c
+++ b/drivers/bus/moxtet.c
@@ -657,7 +657,7 @@ static void moxtet_irq_print_chip(struct irq_data *d, struct seq_file *p)
id = moxtet->modules[pos->idx];
- seq_printf(p, " moxtet-%s.%i#%i", mox_module_name(id), pos->idx,
+ seq_printf(p, "moxtet-%s.%i#%i", mox_module_name(id), pos->idx,
pos->bit);
}
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 0ee5c691fb36..9e46960f6a86 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -17,7 +17,8 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
config ARM_AIROHA_SOC_CPUFREQ
tristate "Airoha EN7581 SoC CPUFreq support"
- depends on (ARCH_AIROHA && OF) || COMPILE_TEST
+ depends on ARCH_AIROHA || COMPILE_TEST
+ depends on OF
select PM_OPP
default ARCH_AIROHA
help
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index dd9b8d6993d6..313550fa62d4 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (min_perf < lowest_nonlinear_perf)
min_perf = lowest_nonlinear_perf;
- max_perf = cap_perf;
+ max_perf = cpudata->max_limit_perf;
if (max_perf < min_perf)
max_perf = min_perf;
@@ -747,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
guard(mutex)(&amd_pstate_driver_lock);
ret = amd_pstate_cpu_boost_update(policy, state);
- policy->boost_enabled = !ret ? state : false;
refresh_frequency_limits(policy);
return ret;
@@ -822,25 +821,28 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
static void amd_pstate_update_limits(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata;
u32 prev_high = 0, cur_high = 0;
int ret;
bool highest_perf_changed = false;
+ if (!amd_pstate_prefcore)
+ return;
+
+ policy = cpufreq_cpu_get(cpu);
if (!policy)
return;
cpudata = policy->driver_data;
- if (!amd_pstate_prefcore)
- return;
-
guard(mutex)(&amd_pstate_driver_lock);
ret = amd_get_highest_perf(cpu, &cur_high);
- if (ret)
- goto free_cpufreq_put;
+ if (ret) {
+ cpufreq_cpu_put(policy);
+ return;
+ }
prev_high = READ_ONCE(cpudata->prefcore_ranking);
highest_perf_changed = (prev_high != cur_high);
@@ -850,8 +852,6 @@ static void amd_pstate_update_limits(unsigned int cpu)
if (cur_high < CPPC_MAX_PERF)
sched_set_itmt_core_prio((int)cur_high, cpu);
}
-
-free_cpufreq_put:
cpufreq_cpu_put(policy);
if (!highest_perf_changed)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e0048856ecee..30ffbddc7ece 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1571,7 +1571,8 @@ static int cpufreq_online(unsigned int cpu)
policy->cdev = of_cpufreq_cooling_register(policy);
/* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
- if (policy->boost_enabled != cpufreq_boost_enabled()) {
+ if (cpufreq_driver->set_boost &&
+ policy->boost_enabled != cpufreq_boost_enabled()) {
policy->boost_enabled = cpufreq_boost_enabled();
ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
if (ret) {
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 71d8b26c4103..9f35f69e0f9e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -106,7 +106,7 @@ config ISCSI_IBFT
select ISCSI_BOOT_SYSFS
select ISCSI_IBFT_FIND if X86
depends on ACPI && SCSI && SCSI_LOWLEVEL
- default n
+ default n
help
This option enables support for detection and exposing of iSCSI
Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 6e9788324fea..371f24569b3b 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -310,7 +310,10 @@ static ssize_t ibft_attr_show_nic(void *data, int type, char *buf)
str += sprintf_ipaddr(str, nic->ip_addr);
break;
case ISCSI_BOOT_ETH_SUBNET_MASK:
- val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1));
+ if (nic->subnet_mask_prefix > 32)
+ val = cpu_to_be32(~0);
+ else
+ val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1));
str += sprintf(str, "%pI4", &val);
break;
case ISCSI_BOOT_ETH_PREFIX_LEN:
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index add5ad29a673..98b4d1633b25 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -338,6 +338,7 @@ config GPIO_GRANITERAPIDS
config GPIO_GRGPIO
tristate "Aeroflex Gaisler GRGPIO support"
+ depends on OF || COMPILE_TEST
select GPIO_GENERIC
select IRQ_DOMAIN
help
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index be4c9981ebc4..d63c1030e6ac 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -841,25 +841,6 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin
DECLARE_BITMAP(trigger, MAX_LINE);
int ret;
- if (chip->driver_data & PCA_PCAL) {
- /* Read the current interrupt status from the device */
- ret = pca953x_read_regs(chip, PCAL953X_INT_STAT, trigger);
- if (ret)
- return false;
-
- /* Check latched inputs and clear interrupt status */
- ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
- if (ret)
- return false;
-
- /* Apply filter for rising/falling edge selection */
- bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio);
-
- bitmap_and(pending, new_stat, trigger, gc->ngpio);
-
- return !bitmap_empty(pending, gc->ngpio);
- }
-
ret = pca953x_read_regs(chip, chip->regs->input, cur_stat);
if (ret)
return false;
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index a086087ada17..b6c230fab840 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -1028,20 +1028,23 @@ gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock)
struct configfs_subsystem *subsys = dev->group.cg_subsys;
struct gpio_sim_bank *bank;
struct gpio_sim_line *line;
+ struct config_item *item;
/*
- * The device only needs to depend on leaf line entries. This is
+ * The device only needs to depend on leaf entries. This is
* sufficient to lock up all the configfs entries that the
* instantiated, alive device depends on.
*/
list_for_each_entry(bank, &dev->bank_list, siblings) {
list_for_each_entry(line, &bank->line_list, siblings) {
+ item = line->hog ? &line->hog->item
+ : &line->group.cg_item;
+
if (lock)
- WARN_ON(configfs_depend_item_unlocked(
- subsys, &line->group.cg_item));
+ WARN_ON(configfs_depend_item_unlocked(subsys,
+ item));
else
- configfs_undepend_item_unlocked(
- &line->group.cg_item);
+ configfs_undepend_item_unlocked(item);
}
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 817116e53d44..dce9323fb410 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -119,9 +119,10 @@
* - 3.57.0 - Compute tunneling on GFX10+
* - 3.58.0 - Add GFX12 DCC support
* - 3.59.0 - Cleared VRAM
+ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 59
+#define KMS_DRIVER_MINOR 60
#define KMS_DRIVER_PATCHLEVEL 0
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ff286940ab43..01ae2f88dec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
mutex_lock(&adev->mman.gtt_window_lock);
while (src_mm.remaining) {
uint64_t from, to, cur_size, tiling_flags;
- uint32_t num_type, data_format, max_com;
+ uint32_t num_type, data_format, max_com, write_compress_disable;
struct dma_fence *next;
/* Never copy more than 256MiB at once to avoid a timeout */
@@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ write_compress_disable =
+ AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
- AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format));
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+ AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+ write_compress_disable));
}
r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 461fb8090ae0..208b7d1d8a27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -119,6 +119,8 @@ struct amdgpu_copy_mem {
#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07
#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8
#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1
#define AMDGPU_COPY_FLAGS_SET(field, value) \
(((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 9c17df2cf37b..7e10e94624e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1741,11 +1741,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint32_t byte_count,
uint32_t copy_flags)
{
- uint32_t num_type, data_format, max_com;
+ uint32_t num_type, data_format, max_com, write_cm;
max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED);
data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT);
num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE);
+ write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1;
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
@@ -1762,7 +1763,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)))
ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) |
((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) |
- ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) |
SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1);
else
ib->ptr[ib->length_dw++] = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index cecaadf741ad..f84e795e35f5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2133,7 +2133,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
dc_enable_stereo(dc, context, dc_streams, context->stream_count);
- if (context->stream_count > get_seamless_boot_stream_count(context) ||
+ if (get_seamless_boot_stream_count(context) == 0 ||
context->stream_count == 0) {
/* Must wait for no flips to be pending before doing optimize bw */
hwss_wait_for_no_pipes_pending(dc, context);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index 5bb8b78bf250..bf636b28e3e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -63,8 +63,7 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
bool should_use_dmub_lock(struct dc_link *link)
{
- if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 ||
- link->psr_settings.psr_version == DC_PSR_VERSION_1)
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
return true;
if (link->replay_settings.replay_feature_enabled)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 46f9c05de16e..e1d500633dfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -29,11 +29,15 @@ dml_ccflags := $(CC_FLAGS_FPU)
dml_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
-ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
-frame_warn_flag := -Wframe-larger-than=3072
-else
-frame_warn_flag := -Wframe-larger-than=2048
-endif
+ ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ frame_warn_limit := 3072
+ else
+ frame_warn_limit := 2048
+ endif
+
+ ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y)
+ frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit)
+ endif
endif
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 91c4f3b4bd5f..21fd466dba26 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -28,15 +28,19 @@ dml2_ccflags := $(CC_FLAGS_FPU)
dml2_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
-ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
-ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy)
-frame_warn_flag := -Wframe-larger-than=4096
-else
-frame_warn_flag := -Wframe-larger-than=3072
-endif
-else
-frame_warn_flag := -Wframe-larger-than=2048
-endif
+ ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy)
+ frame_warn_limit := 4096
+ else
+ frame_warn_limit := 3072
+ endif
+ else
+ frame_warn_limit := 2048
+ endif
+
+ ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y)
+ frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit)
+ endif
endif
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index b9c6b45f6872..0c8ec30ea672 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -1017,7 +1017,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
if (disp_cfg_stream_location < 0)
disp_cfg_stream_location = dml_dispcfg->num_streams++;
- ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx);
adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]);
populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]);
@@ -1042,7 +1042,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
if (disp_cfg_plane_location < 0)
disp_cfg_plane_location = dml_dispcfg->num_planes++;
- ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]);
populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index b416320873e1..b8a34abaf519 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -786,7 +786,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *
case SIGNAL_TYPE_DISPLAY_PORT_MST:
case SIGNAL_TYPE_DISPLAY_PORT:
out->OutputEncoder[location] = dml_dp;
- if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1)
+ if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1)
out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0;
break;
case SIGNAL_TYPE_EDP:
@@ -1343,7 +1343,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
if (disp_cfg_stream_location < 0)
disp_cfg_stream_location = dml_dispcfg->num_timings++;
- ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]);
populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2);
@@ -1383,7 +1383,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat
if (disp_cfg_plane_location < 0)
disp_cfg_plane_location = dml_dispcfg->num_surfaces++;
- ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]);
populate_dml_plane_cfg_from_plane_state(
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
index fe741100c0f8..d347bb06577a 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
@@ -129,7 +129,8 @@ bool hubbub3_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
return wm_pending;
}
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
index 7fb5523f9722..b98505b240a7 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
@@ -750,7 +750,8 @@ static bool hubbub31_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
return wm_pending;
}
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
index 5264dc26cce1..32a6be543105 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
@@ -786,7 +786,8 @@ static bool hubbub32_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow);
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
index 5eb3da8d5206..dce7269959ce 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
@@ -326,7 +326,8 @@ static bool hubbub35_program_watermarks(
DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, 0xA);/*hw delta*/
REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, 0xF);
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow);
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
index be0ac613675a..0da70b50e86d 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
@@ -500,6 +500,8 @@ void hubp3_init(struct hubp *hubp)
//hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1;
REG_WRITE(HUBPREQ_DEBUG, 1 << 26);
+ REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0);
+
hubp_reset(hubp);
}
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
index edd37898d550..f3a21c623f44 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
@@ -168,6 +168,8 @@ void hubp32_init(struct hubp *hubp)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8);
+
+ REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0);
}
static struct hubp_funcs dcn32_hubp_funcs = {
.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 623cde76debf..b907ad1acedd 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -236,7 +236,8 @@ void dcn35_init_hw(struct dc *dc)
}
hws->funcs.init_pipes(dc, dc->current_state);
- if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
+ if (dc->res_pool->hubbub->funcs->allow_self_refresh_control &&
+ !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter)
dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
index ebccb74306a7..f30b3d5eeca5 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
@@ -160,6 +160,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms,
formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl,
kwb_conn->wb_layer->layer_type,
&n_formats);
+ if (!formats) {
+ kfree(kwb_conn);
+ return -ENOMEM;
+ }
err = drm_writeback_connector_init(&kms->base, wb_conn,
&komeda_wb_connector_funcs,
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index 0e282b7b167c..b9eb67e3fa90 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -195,7 +195,7 @@ static bool __ast_dp_wait_enable(struct ast_device *ast, bool enabled)
if (enabled)
vgacrdf_test |= AST_IO_VGACRDF_DP_VIDEO_ENABLE;
- for (i = 0; i < 200; ++i) {
+ for (i = 0; i < 1000; ++i) {
if (i)
mdelay(1);
vgacrdf = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xdf,
diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c
index 007ceb281d00..56a4965e518c 100644
--- a/drivers/gpu/drm/display/drm_dp_cec.c
+++ b/drivers/gpu/drm/display/drm_dp_cec.c
@@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address)
if (!aux->transfer)
return;
-#ifndef CONFIG_MEDIA_CEC_RC
- /*
- * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by
- * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined.
- *
- * Do this here as well to ensure the tests against cec_caps are
- * correct.
- */
- cec_caps &= ~CEC_CAP_RC;
-#endif
cancel_delayed_work_sync(&aux->cec.unregister_work);
mutex_lock(&aux->cec.lock);
@@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address)
num_las = CEC_MAX_LOG_ADDRS;
if (aux->cec.adap) {
- if (aux->cec.adap->capabilities == cec_caps &&
+ /* Check if the adapter properties have changed */
+ if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) ==
+ (cec_caps & CEC_CAP_MONITOR_ALL) &&
aux->cec.adap->available_log_addrs == num_las) {
/* Unchanged, so just set the phys addr */
cec_s_phys_addr(aux->cec.adap, source_physical_address, false);
diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index fc1e517e074a..7e6ce905bdaf 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -41,8 +41,9 @@ static u32 scale(u32 source_val,
{
u64 target_val;
- WARN_ON(source_min > source_max);
- WARN_ON(target_min > target_max);
+ if (WARN_ON(source_min >= source_max) ||
+ WARN_ON(target_min > target_max))
+ return target_min;
/* defensive */
source_val = clamp(source_val, source_min, source_max);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index f1f3b1bb1e89..aa77ddcee42c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1791,7 +1791,7 @@ int intel_dp_dsc_max_src_input_bpc(struct intel_display *display)
if (DISPLAY_VER(display) == 11)
return 10;
- return 0;
+ return intel_dp_dsc_min_src_input_bpc();
}
int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector,
@@ -2072,11 +2072,10 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
/* Compressed BPP should be less than the Input DSC bpp */
dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
- for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) {
- if (valid_dsc_bpp[i] < dsc_min_bpp)
+ for (i = ARRAY_SIZE(valid_dsc_bpp) - 1; i >= 0; i--) {
+ if (valid_dsc_bpp[i] < dsc_min_bpp ||
+ valid_dsc_bpp[i] > dsc_max_bpp)
continue;
- if (valid_dsc_bpp[i] > dsc_max_bpp)
- break;
ret = dsc_compute_link_config(intel_dp,
pipe_config,
@@ -2829,7 +2828,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp,
crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC);
- /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */
as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC;
as_sdp->length = 0x9;
as_sdp->duration_incr_ms = 0;
@@ -2840,7 +2838,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp,
as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode);
as_sdp->target_rr_divider = true;
} else {
- as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL;
+ as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL;
as_sdp->vtotal = adjusted_mode->vtotal;
as_sdp->target_rr = 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 0c44fc7dd86c..a65cf97ad12d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -341,6 +341,10 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp,
break;
}
+
+ /* Allow using zero step to indicate one try */
+ if (!step)
+ break;
}
if (slots < 0) {
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 7464b44c8bb3..1bab7c34a794 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -41,7 +41,7 @@ intel_hdcp_adjust_hdcp_line_rekeying(struct intel_encoder *encoder,
u32 rekey_bit = 0;
/* Here we assume HDMI is in TMDS mode of operation */
- if (encoder->type != INTEL_OUTPUT_HDMI)
+ if (!intel_encoder_is_hdmi(encoder))
return;
if (DISPLAY_VER(display) >= 30) {
@@ -2188,6 +2188,19 @@ static int intel_hdcp2_check_link(struct intel_connector *connector)
drm_dbg_kms(display->drm,
"HDCP2.2 Downstream topology change\n");
+
+ ret = hdcp2_authenticate_repeater_topology(connector);
+ if (!ret) {
+ intel_hdcp_update_value(connector,
+ DRM_MODE_CONTENT_PROTECTION_ENABLED,
+ true);
+ goto out;
+ }
+
+ drm_dbg_kms(display->drm,
+ "[CONNECTOR:%d:%s] Repeater topology auth failed.(%d)\n",
+ connector->base.base.id, connector->base.name,
+ ret);
} else {
drm_dbg_kms(display->drm,
"[CONNECTOR:%d:%s] HDCP2.2 link failed, retrying auth\n",
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index ff9764cac1e7..80e558042d97 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -106,8 +106,6 @@ static const u32 icl_sdr_y_plane_formats[] = {
DRM_FORMAT_Y216,
DRM_FORMAT_XYUV8888,
DRM_FORMAT_XVYU2101010,
- DRM_FORMAT_XVYU12_16161616,
- DRM_FORMAT_XVYU16161616,
};
static const u32 icl_sdr_uv_plane_formats[] = {
@@ -134,8 +132,6 @@ static const u32 icl_sdr_uv_plane_formats[] = {
DRM_FORMAT_Y216,
DRM_FORMAT_XYUV8888,
DRM_FORMAT_XVYU2101010,
- DRM_FORMAT_XVYU12_16161616,
- DRM_FORMAT_XVYU16161616,
};
static const u32 icl_hdr_plane_formats[] = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index fe69f2c8527d..ae3343c81a64 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -209,8 +209,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct address_space *mapping = obj->base.filp->f_mapping;
unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
- struct sgt_iter sgt_iter;
- struct page *page;
int ret;
/*
@@ -239,9 +237,7 @@ rebuild_st:
* for PAGE_SIZE chunks instead may be helpful.
*/
if (max_segment > PAGE_SIZE) {
- for_each_sgt_page(page, sgt_iter, st)
- put_page(page);
- sg_free_table(st);
+ shmem_sg_free_table(st, mapping, false, false);
kfree(st);
max_segment = PAGE_SIZE;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 12f1ba7ca9c1..cc05bd9e43b4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1469,6 +1469,19 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
}
+static void __update_guc_busyness_running_state(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id)
+ engine->stats.guc.running = false;
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
static void __update_guc_busyness_stats(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -1619,6 +1632,9 @@ void intel_guc_busyness_park(struct intel_gt *gt)
if (!guc_submission_initialized(guc))
return;
+ /* Assume no engines are running and set running state to false */
+ __update_guc_busyness_running_state(guc);
+
/*
* There is a race with suspend flow where the worker runs after suspend
* and causes an unclaimed register access warning. Cancel the worker
@@ -5519,12 +5535,20 @@ static inline void guc_log_context(struct drm_printer *p,
{
drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
- drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
- ce->ring->head,
- ce->lrc_reg_state[CTX_RING_HEAD]);
- drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
- ce->ring->tail,
- ce->lrc_reg_state[CTX_RING_TAIL]);
+ if (intel_context_pin_if_active(ce)) {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
+ ce->ring->head,
+ ce->lrc_reg_state[CTX_RING_HEAD]);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
+ ce->ring->tail,
+ ce->lrc_reg_state[CTX_RING_TAIL]);
+ intel_context_unpin(ce);
+ } else {
+ drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n",
+ ce->ring->head);
+ drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n",
+ ce->ring->tail);
+ }
drm_printf(p, "\t\tContext Pin Count: %u\n",
atomic_read(&ce->pin_count));
drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index a49561e9f3c3..a79ad2da070c 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -51,6 +51,10 @@
/* Common to all OA units */
#define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9)
#define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8)
+#define OAG_OACONTROL_USED_BITS \
+ (OAG_OACONTROL_OA_PES_DISAG_EN | OAG_OACONTROL_OA_CCS_SELECT_MASK | \
+ OAG_OACONTROL_OA_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE | \
+ OA_OACONTROL_REPORT_BC_MASK | OA_OACONTROL_COUNTER_SIZE_MASK)
#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED)
#define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14)
@@ -78,6 +82,8 @@
#define OAM_CONTEXT_CONTROL_OFFSET (0x1bc)
#define OAM_CONTROL_OFFSET (0x194)
#define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1)
+#define OAM_OACONTROL_USED_BITS \
+ (OAM_CONTROL_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE)
#define OAM_DEBUG_OFFSET (0x198)
#define OAM_STATUS_OFFSET (0x19c)
#define OAM_MMIO_TRG_OFFSET (0x1d0)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 81dc7795c065..39fe485d2085 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -119,11 +119,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_puts(&p, "\n**** GuC CT ****\n");
xe_guc_ct_snapshot_print(ss->guc.ct, &p);
- /*
- * Don't add a new section header here because the mesa debug decoder
- * tool expects the context information to be in the 'GuC CT' section.
- */
- /* drm_puts(&p, "\n**** Contexts ****\n"); */
+ drm_puts(&p, "\n**** Contexts ****\n");
xe_guc_exec_queue_snapshot_print(ss->ge, &p);
drm_puts(&p, "\n**** Job ****\n");
@@ -395,42 +391,34 @@ int xe_devcoredump_init(struct xe_device *xe)
/**
* xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
*
- * The output is split to multiple lines because some print targets, e.g. dmesg
- * cannot handle arbitrarily long lines. Note also that printing to dmesg in
- * piece-meal fashion is not possible, each separate call to drm_puts() has a
- * line-feed automatically added! Therefore, the entire output line must be
- * constructed in a local buffer first, then printed in one atomic output call.
+ * The output is split into multiple calls to drm_puts() because some print
+ * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may
+ * add newlines, as is the case with dmesg: each drm_puts() call creates a
+ * separate line.
*
* There is also a scheduler yield call to prevent the 'task has been stuck for
* 120s' kernel hang check feature from firing when printing to a slow target
* such as dmesg over a serial port.
*
- * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
- *
* @p: the printer object to output to
* @prefix: optional prefix to add to output string
+ * @suffix: optional suffix to add at the end. 0 disables it and is
+ * not added to the output, which is useful when using multiple calls
+ * to dump data to @p
* @blob: the Binary Large OBject to dump out
* @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
* @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
*/
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix,
const void *blob, size_t offset, size_t size)
{
const u32 *blob32 = (const u32 *)blob;
char buff[ASCII85_BUFSZ], *line_buff;
size_t line_pos = 0;
- /*
- * Splitting blobs across multiple lines is not compatible with the mesa
- * debug decoder tool. Note that even dropping the explicit '\n' below
- * doesn't help because the GuC log is so big some underlying implementation
- * still splits the lines at 512K characters. So just bail completely for
- * the moment.
- */
- return;
-
#define DMESG_MAX_LINE_LEN 800
-#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
+ /* Always leave space for the suffix char and the \0 */
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */
if (size & 3)
drm_printf(p, "Size not word aligned: %zu", size);
@@ -462,7 +450,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
line_pos += strlen(line_buff + line_pos);
if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
- line_buff[line_pos++] = '\n';
line_buff[line_pos++] = 0;
drm_puts(p, line_buff);
@@ -474,10 +461,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
}
}
+ if (suffix)
+ line_buff[line_pos++] = suffix;
+
if (line_pos) {
- line_buff[line_pos++] = '\n';
line_buff[line_pos++] = 0;
-
drm_puts(p, line_buff);
}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index 6a17e6d60102..5391a80a4d1b 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe)
}
#endif
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix,
const void *blob, size_t offset, size_t size);
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 26e64530ada2..5d6fb79957b6 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -532,8 +532,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
- if (IS_SRIOV_PF(gt_to_xe(gt)))
+ if (IS_SRIOV_PF(gt_to_xe(gt))) {
+ xe_gt_sriov_pf_init(gt);
xe_gt_sriov_pf_init_hw(gt);
+ }
xe_force_wake_put(gt_to_fw(gt), fw_ref);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index e71fc3d2bda2..6f906c8e8108 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -68,6 +68,19 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
return 0;
}
+/**
+ * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF.
+ * @gt: the &xe_gt to initialize
+ *
+ * Late one-time initialization of the PF data.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_init(struct xe_gt *gt)
+{
+ return xe_gt_sriov_pf_migration_init(gt);
+}
+
static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
{
return GRAPHICS_VERx100(xe) == 1200;
@@ -90,7 +103,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
pf_enable_ggtt_guest_update(gt);
xe_gt_sriov_pf_service_update(gt);
- xe_gt_sriov_pf_migration_init(gt);
}
static u32 pf_get_vf_regs_stride(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index 96fab779a906..f474509411c0 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -10,6 +10,7 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
+int xe_gt_sriov_pf_init(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
@@ -19,6 +20,11 @@ static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
return 0;
}
+static inline int xe_gt_sriov_pf_init(struct xe_gt *gt)
+{
+ return 0;
+}
+
static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
{
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 8b65c5e959cc..50c8076b5158 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
snapshot->g2h_outstanding);
if (snapshot->ctb)
- xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size);
+ xe_print_blob_ascii85(p, "CTB data", '\n',
+ snapshot->ctb, 0, snapshot->ctb_size);
} else {
drm_puts(p, "CT disabled\n");
}
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index df4cfb698cdb..2baa4d95571f 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -211,8 +211,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_
remain = snapshot->size;
for (i = 0; i < snapshot->num_chunks; i++) {
size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
+ const char *prefix = i ? NULL : "Log data";
+ char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0;
- xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size);
+ xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size);
remain -= size;
}
}
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index eeb96b5f49e2..fa873f3d0a9d 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -237,7 +237,6 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
u32 tail, hw_tail, partial_report_size, available;
int report_size = stream->oa_buffer.format->size;
unsigned long flags;
- bool pollin;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
@@ -282,11 +281,11 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream)
stream->oa_buffer.tail = tail;
available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head);
- pollin = available >= stream->wait_num_reports * report_size;
+ stream->pollin = available >= stream->wait_num_reports * report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- return pollin;
+ return stream->pollin;
}
static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
@@ -294,10 +293,8 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer)
struct xe_oa_stream *stream =
container_of(hrtimer, typeof(*stream), poll_check_timer);
- if (xe_oa_buffer_check_unlocked(stream)) {
- stream->pollin = true;
+ if (xe_oa_buffer_check_unlocked(stream))
wake_up(&stream->poll_wq);
- }
hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns));
@@ -452,6 +449,12 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream)
return val;
}
+static u32 __oactrl_used_bits(struct xe_oa_stream *stream)
+{
+ return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ?
+ OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS;
+}
+
static void xe_oa_enable(struct xe_oa_stream *stream)
{
const struct xe_oa_format *format = stream->oa_buffer.format;
@@ -472,14 +475,14 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
- xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val);
+ xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val);
}
static void xe_oa_disable(struct xe_oa_stream *stream)
{
struct xe_mmio *mmio = &stream->gt->mmio;
- xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0);
+ xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0);
if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
@@ -2534,6 +2537,8 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt)
u->type = DRM_XE_OA_UNIT_TYPE_OAM;
}
+ xe_mmio_write32(&gt->mmio, u->regs.oa_ctrl, 0);
+
/* Ensure MMIO trigger remains disabled till there is a stream */
xe_mmio_write32(&gt->mmio, u->regs.oa_debug,
oag_configure_mmio_trigger(NULL, false));
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index b53eb569bd49..dfc245867a46 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -570,6 +570,8 @@ config HID_LED
config HID_LENOVO
tristate "Lenovo / Thinkpad devices"
+ depends on ACPI
+ select ACPI_PLATFORM_PROFILE
select NEW_LEDS
select LEDS_CLASS
help
@@ -1167,7 +1169,8 @@ config HID_TOPRE
tristate "Topre REALFORCE keyboards"
depends on HID
help
- Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key keyboards.
+ Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key and
+ Topre REALFORCE R3S 87 key keyboards.
config HID_THINGM
tristate "ThingM blink(1) USB RGB LED"
@@ -1374,10 +1377,6 @@ endmenu
source "drivers/hid/bpf/Kconfig"
-endif # HID
-
-source "drivers/hid/usbhid/Kconfig"
-
source "drivers/hid/i2c-hid/Kconfig"
source "drivers/hid/intel-ish-hid/Kconfig"
@@ -1388,4 +1387,10 @@ source "drivers/hid/surface-hid/Kconfig"
source "drivers/hid/intel-thc-hid/Kconfig"
+endif # HID
+
+# USB support may be used with HID disabled
+
+source "drivers/hid/usbhid/Kconfig"
+
endif # HID_SUPPORT
diff --git a/drivers/hid/amd-sfh-hid/Kconfig b/drivers/hid/amd-sfh-hid/Kconfig
index 329de5e12c1a..3291786a5ee6 100644
--- a/drivers/hid/amd-sfh-hid/Kconfig
+++ b/drivers/hid/amd-sfh-hid/Kconfig
@@ -5,7 +5,6 @@ menu "AMD SFH HID Support"
config AMD_SFH_HID
tristate "AMD Sensor Fusion Hub"
- depends on HID
depends on X86
help
If you say yes to this option, support will be included for the
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 7e1ae2a2bcc2..49812a76b7ed 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -474,6 +474,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015)
table = magic_keyboard_2015_fn_keys;
else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 ||
+ hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 ||
hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 ||
hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021)
table = apple2021_fn_keys;
@@ -545,6 +546,9 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
}
}
+ if (usage->hid == 0xc0301) /* Omoton KB066 quirk */
+ code = KEY_F6;
+
if (usage->code != code) {
input_event_with_scancode(input, usage->type, code, usage->hid, value);
@@ -1150,6 +1154,10 @@ static const struct hid_device_id apple_devices[] = {
.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
{ HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021),
.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
+ { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024),
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024),
+ .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021),
.driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY },
{ HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021),
diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c
index 6ece56b850fc..56e858066c3c 100644
--- a/drivers/hid/hid-corsair-void.c
+++ b/drivers/hid/hid-corsair-void.c
@@ -553,7 +553,7 @@ static void corsair_void_battery_remove_work_handler(struct work_struct *work)
static void corsair_void_battery_add_work_handler(struct work_struct *work)
{
struct corsair_void_drvdata *drvdata;
- struct power_supply_config psy_cfg;
+ struct power_supply_config psy_cfg = {};
struct power_supply *new_supply;
drvdata = container_of(work, struct corsair_void_drvdata,
@@ -726,6 +726,7 @@ static void corsair_void_remove(struct hid_device *hid_dev)
if (drvdata->battery)
power_supply_unregister(drvdata->battery);
+ cancel_delayed_work_sync(&drvdata->delayed_status_work);
cancel_delayed_work_sync(&drvdata->delayed_firmware_work);
sysfs_remove_group(&hid_dev->dev.kobj, &corsair_void_attr_group);
}
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index c448de53bf91..7e400624908e 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -184,6 +184,7 @@
#define USB_DEVICE_ID_APPLE_IRCONTROL4 0x8242
#define USB_DEVICE_ID_APPLE_IRCONTROL5 0x8243
#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 0x029c
+#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 0x0320
#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 0x029a
#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021 0x029f
#define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102
@@ -1095,6 +1096,7 @@
#define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001 0x3001
#define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003 0x3003
#define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008
+#define USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473 0x5473
#define I2C_VENDOR_ID_RAYDIUM 0x2386
#define I2C_PRODUCT_ID_RAYDIUM_4B33 0x4b33
@@ -1301,6 +1303,7 @@
#define USB_VENDOR_ID_TOPRE 0x0853
#define USB_DEVICE_ID_TOPRE_REALFORCE_R2_108 0x0148
#define USB_DEVICE_ID_TOPRE_REALFORCE_R2_87 0x0146
+#define USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87 0x0313
#define USB_VENDOR_ID_TOPSEED 0x0766
#define USB_DEVICE_ID_TOPSEED_CYBERLINK 0x0204
diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c
index 4d00bc4d656e..a7d9ca02779e 100644
--- a/drivers/hid/hid-lenovo.c
+++ b/drivers/hid/hid-lenovo.c
@@ -32,9 +32,7 @@
#include <linux/leds.h>
#include <linux/workqueue.h>
-#if IS_ENABLED(CONFIG_ACPI_PLATFORM_PROFILE)
#include <linux/platform_profile.h>
-#endif /* CONFIG_ACPI_PLATFORM_PROFILE */
#include "hid-ids.h"
@@ -730,13 +728,10 @@ static int lenovo_raw_event_TP_X12_tab(struct hid_device *hdev, u32 raw_data)
if (hdev->product == USB_DEVICE_ID_LENOVO_X12_TAB) {
report_key_event(input, KEY_RFKILL);
return 1;
- }
-#if IS_ENABLED(CONFIG_ACPI_PLATFORM_PROFILE)
- else {
+ } else {
platform_profile_cycle();
return 1;
}
-#endif /* CONFIG_ACPI_PLATFORM_PROFILE */
return 0;
case TP_X12_RAW_HOTKEY_FN_F10:
/* TAB1 has PICKUP Phone and TAB2 use Snipping tool*/
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 82900857bfd8..e50887a6d22c 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1679,9 +1679,12 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
break;
}
- if (suffix)
+ if (suffix) {
hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
"%s %s", hdev->name, suffix);
+ if (!hi->input->name)
+ return -ENOMEM;
+ }
return 0;
}
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index e0bbf0c6345d..5d7a418ccdbe 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -891,6 +891,7 @@ static const struct hid_device_id hid_ignore_list[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DPAD) },
#endif
{ HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) },
{ }
};
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
index af38fc8eb34f..c9e65e9088b3 100644
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -313,6 +313,7 @@ struct steam_device {
u16 rumble_left;
u16 rumble_right;
unsigned int sensor_timestamp_us;
+ struct work_struct unregister_work;
};
static int steam_recv_report(struct steam_device *steam,
@@ -1050,10 +1051,10 @@ static void steam_mode_switch_cb(struct work_struct *work)
struct steam_device, mode_switch);
unsigned long flags;
bool client_opened;
- steam->gamepad_mode = !steam->gamepad_mode;
if (!lizard_mode)
return;
+ steam->gamepad_mode = !steam->gamepad_mode;
if (steam->gamepad_mode)
steam_set_lizard_mode(steam, false);
else {
@@ -1072,6 +1073,31 @@ static void steam_mode_switch_cb(struct work_struct *work)
}
}
+static void steam_work_unregister_cb(struct work_struct *work)
+{
+ struct steam_device *steam = container_of(work, struct steam_device,
+ unregister_work);
+ unsigned long flags;
+ bool connected;
+ bool opened;
+
+ spin_lock_irqsave(&steam->lock, flags);
+ opened = steam->client_opened;
+ connected = steam->connected;
+ spin_unlock_irqrestore(&steam->lock, flags);
+
+ if (connected) {
+ if (opened) {
+ steam_sensors_unregister(steam);
+ steam_input_unregister(steam);
+ } else {
+ steam_set_lizard_mode(steam, lizard_mode);
+ steam_input_register(steam);
+ steam_sensors_register(steam);
+ }
+ }
+}
+
static bool steam_is_valve_interface(struct hid_device *hdev)
{
struct hid_report_enum *rep_enum;
@@ -1117,8 +1143,7 @@ static int steam_client_ll_open(struct hid_device *hdev)
steam->client_opened++;
spin_unlock_irqrestore(&steam->lock, flags);
- steam_sensors_unregister(steam);
- steam_input_unregister(steam);
+ schedule_work(&steam->unregister_work);
return 0;
}
@@ -1135,11 +1160,7 @@ static void steam_client_ll_close(struct hid_device *hdev)
connected = steam->connected && !steam->client_opened;
spin_unlock_irqrestore(&steam->lock, flags);
- if (connected) {
- steam_set_lizard_mode(steam, lizard_mode);
- steam_input_register(steam);
- steam_sensors_register(steam);
- }
+ schedule_work(&steam->unregister_work);
}
static int steam_client_ll_raw_request(struct hid_device *hdev,
@@ -1231,6 +1252,7 @@ static int steam_probe(struct hid_device *hdev,
INIT_LIST_HEAD(&steam->list);
INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb);
steam->sensor_timestamp_us = 0;
+ INIT_WORK(&steam->unregister_work, steam_work_unregister_cb);
/*
* With the real steam controller interface, do not connect hidraw.
@@ -1291,6 +1313,7 @@ err_cancel_work:
cancel_work_sync(&steam->work_connect);
cancel_delayed_work_sync(&steam->mode_switch);
cancel_work_sync(&steam->rumble_work);
+ cancel_work_sync(&steam->unregister_work);
return ret;
}
@@ -1307,6 +1330,7 @@ static void steam_remove(struct hid_device *hdev)
cancel_delayed_work_sync(&steam->mode_switch);
cancel_work_sync(&steam->work_connect);
cancel_work_sync(&steam->rumble_work);
+ cancel_work_sync(&steam->unregister_work);
hid_destroy_device(steam->client_hdev);
steam->client_hdev = NULL;
steam->client_opened = 0;
@@ -1593,13 +1617,13 @@ static void steam_do_deck_input_event(struct steam_device *steam,
if (!(b9 & BIT(6)) && steam->did_mode_switch) {
steam->did_mode_switch = false;
- cancel_delayed_work_sync(&steam->mode_switch);
+ cancel_delayed_work(&steam->mode_switch);
} else if (!steam->client_opened && (b9 & BIT(6)) && !steam->did_mode_switch) {
steam->did_mode_switch = true;
schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100);
}
- if (!steam->gamepad_mode)
+ if (!steam->gamepad_mode && lizard_mode)
return;
lpad_touched = b10 & BIT(3);
@@ -1669,7 +1693,7 @@ static void steam_do_deck_sensors_event(struct steam_device *steam,
*/
steam->sensor_timestamp_us += 4000;
- if (!steam->gamepad_mode)
+ if (!steam->gamepad_mode && lizard_mode)
return;
input_event(sensors, EV_MSC, MSC_TIMESTAMP, steam->sensor_timestamp_us);
diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index 6c3e758bbb09..3b81468a1df2 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -171,7 +171,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev)
b_ep = ep->desc.bEndpointAddress;
/* Are the expected endpoints present? */
- u8 ep_addr[1] = {b_ep};
+ u8 ep_addr[2] = {b_ep, 0};
if (!usb_check_int_endpoints(usbif, ep_addr)) {
hid_err(hdev, "Unexpected non-int endpoint\n");
diff --git a/drivers/hid/hid-topre.c b/drivers/hid/hid-topre.c
index 848361f6225d..ccedf8721722 100644
--- a/drivers/hid/hid-topre.c
+++ b/drivers/hid/hid-topre.c
@@ -29,6 +29,11 @@ static const __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc,
hid_info(hdev,
"fixing up Topre REALFORCE keyboard report descriptor\n");
rdesc[72] = 0x02;
+ } else if (*rsize >= 106 && rdesc[28] == 0x29 && rdesc[29] == 0xe7 &&
+ rdesc[30] == 0x81 && rdesc[31] == 0x00) {
+ hid_info(hdev,
+ "fixing up Topre REALFORCE keyboard report descriptor\n");
+ rdesc[31] = 0x02;
}
return rdesc;
}
@@ -38,6 +43,8 @@ static const struct hid_device_id topre_id_table[] = {
USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPRE,
USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE,
+ USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87) },
{ }
};
MODULE_DEVICE_TABLE(hid, topre_id_table);
diff --git a/drivers/hid/hid-winwing.c b/drivers/hid/hid-winwing.c
index 831b760c66ea..d4afbbd27807 100644
--- a/drivers/hid/hid-winwing.c
+++ b/drivers/hid/hid-winwing.c
@@ -106,6 +106,8 @@ static int winwing_init_led(struct hid_device *hdev,
"%s::%s",
dev_name(&input->dev),
info->led_name);
+ if (!led->cdev.name)
+ return -ENOMEM;
ret = devm_led_classdev_register(&hdev->dev, &led->cdev);
if (ret)
diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig
index ef7c595c9403..e8d51f410cc1 100644
--- a/drivers/hid/i2c-hid/Kconfig
+++ b/drivers/hid/i2c-hid/Kconfig
@@ -2,7 +2,7 @@
menuconfig I2C_HID
tristate "I2C HID support"
default y
- depends on I2C && INPUT && HID
+ depends on I2C
if I2C_HID
diff --git a/drivers/hid/intel-ish-hid/Kconfig b/drivers/hid/intel-ish-hid/Kconfig
index 253dc10d35ef..568c8688784e 100644
--- a/drivers/hid/intel-ish-hid/Kconfig
+++ b/drivers/hid/intel-ish-hid/Kconfig
@@ -6,7 +6,6 @@ config INTEL_ISH_HID
tristate "Intel Integrated Sensor Hub"
default n
depends on X86
- depends on HID
help
The Integrated Sensor Hub (ISH) enables the ability to offload
sensor polling and algorithm processing to a dedicated low power
diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
index cdd80c653918..07e90d51f073 100644
--- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h
+++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
@@ -36,6 +36,8 @@
#define PCI_DEVICE_ID_INTEL_ISH_ARL_H 0x7745
#define PCI_DEVICE_ID_INTEL_ISH_ARL_S 0x7F78
#define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845
+#define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345
+#define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445
#define REVISION_ID_CHT_A0 0x6
#define REVISION_ID_CHT_Ax_SI 0x0
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index 3cd53fc80634..4c861119e97a 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -517,6 +517,10 @@ static int ish_fw_reset_handler(struct ishtp_device *dev)
/* ISH FW is dead */
if (!ish_is_input_ready(dev))
return -EPIPE;
+
+ /* Send clock sync at once after reset */
+ ishtp_dev->prev_sync = 0;
+
/*
* Set HOST2ISH.ILUP. Apparently we need this BEFORE sending
* RESET_NOTIFY_ACK - FW will be checking for it
@@ -577,15 +581,14 @@ static void fw_reset_work_fn(struct work_struct *work)
*/
static void _ish_sync_fw_clock(struct ishtp_device *dev)
{
- static unsigned long prev_sync;
- uint64_t usec;
+ struct ipc_time_update_msg time = {};
- if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ))
+ if (dev->prev_sync && time_before(jiffies, dev->prev_sync + 20 * HZ))
return;
- prev_sync = jiffies;
- usec = ktime_to_us(ktime_get_boottime());
- ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t));
+ dev->prev_sync = jiffies;
+ /* The fields of time would be updated while sending message */
+ ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time));
}
/**
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 9e2401291a2f..ff0fc8010072 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -26,9 +26,11 @@
enum ishtp_driver_data_index {
ISHTP_DRIVER_DATA_NONE,
ISHTP_DRIVER_DATA_LNL_M,
+ ISHTP_DRIVER_DATA_PTL,
};
#define ISH_FW_GEN_LNL_M "lnlm"
+#define ISH_FW_GEN_PTL "ptl"
#define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin"
#define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin"
@@ -37,6 +39,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = {
[ISHTP_DRIVER_DATA_LNL_M] = {
.fw_generation = ISH_FW_GEN_LNL_M,
},
+ [ISHTP_DRIVER_DATA_PTL] = {
+ .fw_generation = ISH_FW_GEN_PTL,
+ },
};
static const struct pci_device_id ish_pci_tbl[] = {
@@ -63,6 +68,8 @@ static const struct pci_device_id ish_pci_tbl[] = {
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL},
{}
};
MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
index 44eddc411e97..ec9f6e87aaf2 100644
--- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
+++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
@@ -253,6 +253,8 @@ struct ishtp_device {
unsigned int ipc_tx_cnt;
unsigned long long ipc_tx_bytes_cnt;
+ /* Time of the last clock sync */
+ unsigned long prev_sync;
const struct ishtp_hw_ops *ops;
size_t mtu;
uint32_t ishtp_msg_hdr;
diff --git a/drivers/hid/intel-thc-hid/Kconfig b/drivers/hid/intel-thc-hid/Kconfig
index 91ec84902db8..0351d1137607 100644
--- a/drivers/hid/intel-thc-hid/Kconfig
+++ b/drivers/hid/intel-thc-hid/Kconfig
@@ -7,7 +7,6 @@ menu "Intel THC HID Support"
config INTEL_THC_HID
tristate "Intel Touch Host Controller"
depends on ACPI
- select HID
help
THC (Touch Host Controller) is the name of the IP block in PCH that
interfaces with Touch Devices (ex: touchscreen, touchpad etc.). It
diff --git a/drivers/hid/surface-hid/Kconfig b/drivers/hid/surface-hid/Kconfig
index 7ce9b5d641eb..d0cfd0d29926 100644
--- a/drivers/hid/surface-hid/Kconfig
+++ b/drivers/hid/surface-hid/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0+
menu "Surface System Aggregator Module HID support"
depends on SURFACE_AGGREGATOR
- depends on INPUT
config SURFACE_HID
tristate "HID transport driver for Surface System Aggregator Module"
@@ -39,4 +38,3 @@ endmenu
config SURFACE_HID_CORE
tristate
- select HID
diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig
index 7c2032f7f44d..f3194767a45e 100644
--- a/drivers/hid/usbhid/Kconfig
+++ b/drivers/hid/usbhid/Kconfig
@@ -5,8 +5,7 @@ menu "USB HID support"
config USB_HID
tristate "USB HID transport layer"
default y
- depends on USB && INPUT
- select HID
+ depends on HID
help
Say Y here if you want to connect USB keyboards,
mice, joysticks, graphic tablets, or any other HID based devices
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 5546184df05f..35a221e2c11c 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1300,12 +1300,14 @@ new_device_store(struct device *dev, struct device_attribute *attr,
info.flags |= I2C_CLIENT_SLAVE;
}
- info.flags |= I2C_CLIENT_USER;
-
client = i2c_new_client_device(adap, &info);
if (IS_ERR(client))
return PTR_ERR(client);
+ /* Keep track of the added device */
+ mutex_lock(&adap->userspace_clients_lock);
+ list_add_tail(&client->detected, &adap->userspace_clients);
+ mutex_unlock(&adap->userspace_clients_lock);
dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
info.type, info.addr);
@@ -1313,15 +1315,6 @@ new_device_store(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_WO(new_device);
-static int __i2c_find_user_addr(struct device *dev, const void *addrp)
-{
- struct i2c_client *client = i2c_verify_client(dev);
- unsigned short addr = *(unsigned short *)addrp;
-
- return client && client->flags & I2C_CLIENT_USER &&
- i2c_encode_flags_to_addr(client) == addr;
-}
-
/*
* And of course let the users delete the devices they instantiated, if
* they got it wrong. This interface can only be used to delete devices
@@ -1336,7 +1329,7 @@ delete_device_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct i2c_adapter *adap = to_i2c_adapter(dev);
- struct device *child_dev;
+ struct i2c_client *client, *next;
unsigned short addr;
char end;
int res;
@@ -1352,19 +1345,28 @@ delete_device_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
}
- mutex_lock(&core_lock);
/* Make sure the device was added through sysfs */
- child_dev = device_find_child(&adap->dev, &addr, __i2c_find_user_addr);
- if (child_dev) {
- i2c_unregister_device(i2c_verify_client(child_dev));
- put_device(child_dev);
- } else {
- dev_err(dev, "Can't find userspace-created device at %#x\n", addr);
- count = -ENOENT;
+ res = -ENOENT;
+ mutex_lock_nested(&adap->userspace_clients_lock,
+ i2c_adapter_depth(adap));
+ list_for_each_entry_safe(client, next, &adap->userspace_clients,
+ detected) {
+ if (i2c_encode_flags_to_addr(client) == addr) {
+ dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
+ "delete_device", client->name, client->addr);
+
+ list_del(&client->detected);
+ i2c_unregister_device(client);
+ res = count;
+ break;
+ }
}
- mutex_unlock(&core_lock);
+ mutex_unlock(&adap->userspace_clients_lock);
- return count;
+ if (res < 0)
+ dev_err(dev, "%s: Can't find device in list\n",
+ "delete_device");
+ return res;
}
static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL,
delete_device_store);
@@ -1535,6 +1537,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
adap->locked_flags = 0;
rt_mutex_init(&adap->bus_lock);
rt_mutex_init(&adap->mux_lock);
+ mutex_init(&adap->userspace_clients_lock);
+ INIT_LIST_HEAD(&adap->userspace_clients);
/* Set default timeout to 1 second if not already set */
if (adap->timeout == 0)
@@ -1700,6 +1704,23 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap)
}
EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter);
+static void i2c_do_del_adapter(struct i2c_driver *driver,
+ struct i2c_adapter *adapter)
+{
+ struct i2c_client *client, *_n;
+
+ /* Remove the devices we created ourselves as the result of hardware
+ * probing (using a driver's detect method) */
+ list_for_each_entry_safe(client, _n, &driver->clients, detected) {
+ if (client->adapter == adapter) {
+ dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
+ client->name, client->addr);
+ list_del(&client->detected);
+ i2c_unregister_device(client);
+ }
+ }
+}
+
static int __unregister_client(struct device *dev, void *dummy)
{
struct i2c_client *client = i2c_verify_client(dev);
@@ -1715,6 +1736,12 @@ static int __unregister_dummy(struct device *dev, void *dummy)
return 0;
}
+static int __process_removed_adapter(struct device_driver *d, void *data)
+{
+ i2c_do_del_adapter(to_i2c_driver(d), data);
+ return 0;
+}
+
/**
* i2c_del_adapter - unregister I2C adapter
* @adap: the adapter being unregistered
@@ -1726,6 +1753,7 @@ static int __unregister_dummy(struct device *dev, void *dummy)
void i2c_del_adapter(struct i2c_adapter *adap)
{
struct i2c_adapter *found;
+ struct i2c_client *client, *next;
/* First make sure that this adapter was ever added */
mutex_lock(&core_lock);
@@ -1737,16 +1765,31 @@ void i2c_del_adapter(struct i2c_adapter *adap)
}
i2c_acpi_remove_space_handler(adap);
+ /* Tell drivers about this removal */
+ mutex_lock(&core_lock);
+ bus_for_each_drv(&i2c_bus_type, NULL, adap,
+ __process_removed_adapter);
+ mutex_unlock(&core_lock);
+
+ /* Remove devices instantiated from sysfs */
+ mutex_lock_nested(&adap->userspace_clients_lock,
+ i2c_adapter_depth(adap));
+ list_for_each_entry_safe(client, next, &adap->userspace_clients,
+ detected) {
+ dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name,
+ client->addr);
+ list_del(&client->detected);
+ i2c_unregister_device(client);
+ }
+ mutex_unlock(&adap->userspace_clients_lock);
/* Detach any active clients. This can't fail, thus we do not
* check the returned value. This is a two-pass process, because
* we can't remove the dummy devices during the first pass: they
* could have been instantiated by real devices wishing to clean
* them up properly, so we give them a chance to do that first. */
- mutex_lock(&core_lock);
device_for_each_child(&adap->dev, NULL, __unregister_client);
device_for_each_child(&adap->dev, NULL, __unregister_dummy);
- mutex_unlock(&core_lock);
/* device name is gone after device_unregister */
dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name);
@@ -1966,6 +2009,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
/* add the driver to the list of i2c drivers in the driver core */
driver->driver.owner = owner;
driver->driver.bus = &i2c_bus_type;
+ INIT_LIST_HEAD(&driver->clients);
/* When registration returns, the driver core
* will have called probe() for all matching-but-unbound devices.
@@ -1983,13 +2027,10 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
}
EXPORT_SYMBOL(i2c_register_driver);
-static int __i2c_unregister_detected_client(struct device *dev, void *argp)
+static int __process_removed_driver(struct device *dev, void *data)
{
- struct i2c_client *client = i2c_verify_client(dev);
-
- if (client && client->flags & I2C_CLIENT_AUTO)
- i2c_unregister_device(client);
-
+ if (dev->type == &i2c_adapter_type)
+ i2c_do_del_adapter(data, to_i2c_adapter(dev));
return 0;
}
@@ -2000,12 +2041,7 @@ static int __i2c_unregister_detected_client(struct device *dev, void *argp)
*/
void i2c_del_driver(struct i2c_driver *driver)
{
- mutex_lock(&core_lock);
- /* Satisfy __must_check, function can't fail */
- if (driver_for_each_device(&driver->driver, NULL, NULL,
- __i2c_unregister_detected_client)) {
- }
- mutex_unlock(&core_lock);
+ i2c_for_each_dev(driver, __process_removed_driver);
driver_unregister(&driver->driver);
pr_debug("driver [%s] unregistered\n", driver->driver.name);
@@ -2432,7 +2468,6 @@ static int i2c_detect_address(struct i2c_client *temp_client,
/* Finally call the custom detection function */
memset(&info, 0, sizeof(struct i2c_board_info));
info.addr = addr;
- info.flags = I2C_CLIENT_AUTO;
err = driver->detect(temp_client, &info);
if (err) {
/* -ENODEV is returned if the detection fails. We catch it
@@ -2459,7 +2494,9 @@ static int i2c_detect_address(struct i2c_client *temp_client,
dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
info.type, info.addr);
client = i2c_new_client_device(adapter, &info);
- if (IS_ERR(client))
+ if (!IS_ERR(client))
+ list_add_tail(&client->detected, &driver->clients);
+ else
dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n",
info.type, info.addr);
}
diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c
index 8e76d2913e6b..4441ffe149ea 100644
--- a/drivers/irqchip/irq-partition-percpu.c
+++ b/drivers/irqchip/irq-partition-percpu.c
@@ -98,7 +98,7 @@ static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p)
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
- seq_printf(p, " %5s-%lu", chip->name, data->hwirq);
+ seq_printf(p, "%5s-%lu", chip->name, data->hwirq);
}
static struct irq_chip partition_irq_chip = {
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index a382929ce7ba..369aed044b40 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -76,10 +76,8 @@ static int linear_set_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
- if (err) {
- queue_limits_cancel_update(mddev->gendisk->queue);
+ if (err)
return err;
- }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index 226915ca3c93..aa4a9940b569 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c
@@ -159,6 +159,7 @@ err_regmap:
}
static struct regmap *device_node_get_regmap(struct device_node *np,
+ bool create_regmap,
bool check_res)
{
struct syscon *entry, *syscon = NULL;
@@ -172,7 +173,7 @@ static struct regmap *device_node_get_regmap(struct device_node *np,
}
if (!syscon) {
- if (of_device_is_compatible(np, "syscon"))
+ if (create_regmap)
syscon = of_syscon_register(np, check_res);
else
syscon = ERR_PTR(-EINVAL);
@@ -233,15 +234,37 @@ err_unlock:
}
EXPORT_SYMBOL_GPL(of_syscon_register_regmap);
+/**
+ * device_node_to_regmap() - Get or create a regmap for specified device node
+ * @np: Device tree node
+ *
+ * Get a regmap for the specified device node. If there's not an existing
+ * regmap, then one is instantiated. This function should not be used if the
+ * device node has a custom regmap driver or has resources (clocks, resets) to
+ * be managed. Use syscon_node_to_regmap() instead for those cases.
+ *
+ * Return: regmap ptr on success, negative error code on failure.
+ */
struct regmap *device_node_to_regmap(struct device_node *np)
{
- return device_node_get_regmap(np, false);
+ return device_node_get_regmap(np, true, false);
}
EXPORT_SYMBOL_GPL(device_node_to_regmap);
+/**
+ * syscon_node_to_regmap() - Get or create a regmap for specified syscon device node
+ * @np: Device tree node
+ *
+ * Get a regmap for the specified device node. If there's not an existing
+ * regmap, then one is instantiated if the node is a generic "syscon". This
+ * function is safe to use for a syscon registered with
+ * of_syscon_register_regmap().
+ *
+ * Return: regmap ptr on success, negative error code on failure.
+ */
struct regmap *syscon_node_to_regmap(struct device_node *np)
{
- return device_node_get_regmap(np, true);
+ return device_node_get_regmap(np, of_device_is_compatible(np, "syscon"), true);
}
EXPORT_SYMBOL_GPL(syscon_node_to_regmap);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 40046770f1bf..818d4e49aab5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1700,7 +1700,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
&result);
- if (status < 0)
+
+ /*
+ * It's either a kernel error or the host observed a connection
+ * lost. In either case it's not possible communicate with the
+ * controller and thus enter the error code path.
+ */
+ if (status < 0 || status == NVME_SC_HOST_PATH_ERROR)
return status;
/*
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 094be164ffdc..f4f1866fbd5b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -781,11 +781,19 @@ restart:
static void
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
{
+ enum nvme_ctrl_state state;
+ unsigned long flags;
+
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
- switch (nvme_ctrl_state(&ctrl->ctrl)) {
+ spin_lock_irqsave(&ctrl->lock, flags);
+ set_bit(ASSOC_FAILED, &ctrl->flags);
+ state = nvme_ctrl_state(&ctrl->ctrl);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+
+ switch (state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
/*
@@ -2079,7 +2087,8 @@ done:
nvme_fc_complete_rq(rq);
check_error:
- if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
+ if (terminate_assoc &&
+ nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING)
queue_work(nvme_reset_wq, &ctrl->ioerr_work);
}
@@ -2533,6 +2542,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
static void
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
{
+ enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
+
/*
* if an error (io timeout, etc) while (re)connecting, the remote
* port requested terminating of the association (disconnect_ls)
@@ -2540,9 +2551,8 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
* the controller. Abort any ios on the association and let the
* create_association error path resolve things.
*/
- if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
+ if (state == NVME_CTRL_CONNECTING) {
__nvme_fc_abort_outstanding_ios(ctrl, true);
- set_bit(ASSOC_FAILED, &ctrl->flags);
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport error during (re)connect\n",
ctrl->cnum);
@@ -2550,7 +2560,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
}
/* Otherwise, only proceed if in LIVE state - e.g. on first error */
- if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+ if (state != NVME_CTRL_LIVE)
return;
dev_warn(ctrl->ctrl.device,
@@ -3167,12 +3177,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else
ret = nvme_fc_recreate_io_queues(ctrl);
}
- if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
- ret = -EIO;
if (ret)
goto out_term_aen_ops;
- changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+ spin_lock_irqsave(&ctrl->lock, flags);
+ if (!test_bit(ASSOC_FAILED, &ctrl->flags))
+ changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+ else
+ ret = -EIO;
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+
+ if (ret)
+ goto out_term_aen_ops;
ctrl->ctrl.nr_reconnects = 0;
@@ -3578,8 +3594,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
spin_unlock_irqrestore(&rport->lock, flags);
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
- !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
goto fail_ctrl;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 278bed4e35bb..9197a5b173fd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2153,14 +2153,6 @@ static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
return 0;
out_free_bufs:
- while (--i >= 0) {
- size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE;
-
- dma_free_attrs(dev->dev, size, bufs[i],
- le64_to_cpu(descs[i].addr),
- DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
- }
-
kfree(bufs);
out_free_descs:
dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
@@ -3147,7 +3139,9 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
* because of high power consumption (> 2 Watt) in s2idle
* sleep. Only some boards with Intel CPU are affected.
*/
- if (dmi_match(DMI_BOARD_NAME, "GMxPXxx") ||
+ if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") ||
+ dmi_match(DMI_BOARD_NAME, "GMxPXxx") ||
+ dmi_match(DMI_BOARD_NAME, "GXxMRXx") ||
dmi_match(DMI_BOARD_NAME, "PH4PG31") ||
dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") ||
dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71"))
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index b68a9e5f1ea3..3a41b9ab0f13 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
-const struct attribute_group nvme_tls_attrs_group = {
+static const struct attribute_group nvme_tls_attrs_group = {
.attrs = nvme_tls_attrs,
.is_visible = nvme_tls_attrs_are_visible,
};
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index e670dc185a96..acc138bbf8f2 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -1068,6 +1068,7 @@ static void nvme_execute_identify_ns_nvm(struct nvmet_req *req)
goto out;
}
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+ kfree(id);
out:
nvmet_req_complete(req, status);
}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index a7ff05b3be29..eb406c90c167 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -287,7 +287,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
args.subsysnqn = d->subsysnqn;
args.hostnqn = d->hostnqn;
args.hostid = &d->hostid;
- args.kato = c->kato;
+ args.kato = le32_to_cpu(c->kato);
ctrl = nvmet_alloc_ctrl(&args);
if (!ctrl)
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index c1f574fe3280..83be0657e6df 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -272,7 +272,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
iter_flags = SG_MITER_FROM_SG;
}
- if (req->cmd->rw.control & NVME_RW_LR)
+ if (req->cmd->rw.control & cpu_to_le16(NVME_RW_LR))
opf |= REQ_FAILFAST_DEV;
if (is_pci_p2pdma_page(sg_page(req->sg)))
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index b540216c0c9a..4be8d22d2d8d 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -589,7 +589,7 @@ struct nvmet_alloc_ctrl_args {
const struct nvmet_fabrics_ops *ops;
struct device *p2p_client;
u32 kato;
- u32 result;
+ __le32 result;
u16 error_loc;
u16 status;
};
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index e0bc90597dca..da3e7edcf49d 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -108,9 +108,6 @@ void pci_save_aspm_l1ss_state(struct pci_dev *pdev)
pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++);
pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++);
- if (parent->state_saved)
- return;
-
/*
* Save parent's L1 substate configuration so we have it for
* pci_restore_aspm_l1ss_state(pdev) to restore.
diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
index 1e604fbbda65..07de59ca2ebf 100644
--- a/drivers/pci/tph.c
+++ b/drivers/pci/tph.c
@@ -360,7 +360,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag)
return err;
}
- set_ctrl_reg_req_en(pdev, pdev->tph_mode);
+ set_ctrl_reg_req_en(pdev, pdev->tph_req_type);
pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n",
(loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag);
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 0b13d7f17b32..42547f64453e 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -89,12 +89,12 @@ static void pinconf_generic_dump_one(struct pinctrl_dev *pctldev,
seq_puts(s, items[i].display);
/* Print unit if available */
if (items[i].has_arg) {
- seq_printf(s, " (0x%x",
- pinconf_to_config_argument(config));
+ u32 val = pinconf_to_config_argument(config);
+
if (items[i].format)
- seq_printf(s, " %s)", items[i].format);
+ seq_printf(s, " (%u %s)", val, items[i].format);
else
- seq_puts(s, ")");
+ seq_printf(s, " (0x%x)", val);
}
}
}
diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c
index 0d6c2027d4c1..d73004b4a45e 100644
--- a/drivers/pinctrl/pinctrl-cy8c95x0.c
+++ b/drivers/pinctrl/pinctrl-cy8c95x0.c
@@ -42,7 +42,7 @@
#define CY8C95X0_PORTSEL 0x18
/* Port settings, write PORTSEL first */
#define CY8C95X0_INTMASK 0x19
-#define CY8C95X0_PWMSEL 0x1A
+#define CY8C95X0_SELPWM 0x1A
#define CY8C95X0_INVERT 0x1B
#define CY8C95X0_DIRECTION 0x1C
/* Drive mode register change state on writing '1' */
@@ -328,14 +328,14 @@ static int cypress_get_pin_mask(struct cy8c95x0_pinctrl *chip, unsigned int pin)
static bool cy8c95x0_readable_register(struct device *dev, unsigned int reg)
{
/*
- * Only 12 registers are present per port (see Table 6 in the
- * datasheet).
+ * Only 12 registers are present per port (see Table 6 in the datasheet).
*/
- if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) < 12)
- return true;
+ if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) >= 12)
+ return false;
switch (reg) {
case 0x24 ... 0x27:
+ case 0x31 ... 0x3f:
return false;
default:
return true;
@@ -344,8 +344,11 @@ static bool cy8c95x0_readable_register(struct device *dev, unsigned int reg)
static bool cy8c95x0_writeable_register(struct device *dev, unsigned int reg)
{
- if (reg >= CY8C95X0_VIRTUAL)
- return true;
+ /*
+ * Only 12 registers are present per port (see Table 6 in the datasheet).
+ */
+ if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) >= 12)
+ return false;
switch (reg) {
case CY8C95X0_INPUT_(0) ... CY8C95X0_INPUT_(7):
@@ -353,6 +356,7 @@ static bool cy8c95x0_writeable_register(struct device *dev, unsigned int reg)
case CY8C95X0_DEVID:
return false;
case 0x24 ... 0x27:
+ case 0x31 ... 0x3f:
return false;
default:
return true;
@@ -365,8 +369,8 @@ static bool cy8c95x0_volatile_register(struct device *dev, unsigned int reg)
case CY8C95X0_INPUT_(0) ... CY8C95X0_INPUT_(7):
case CY8C95X0_INTSTATUS_(0) ... CY8C95X0_INTSTATUS_(7):
case CY8C95X0_INTMASK:
+ case CY8C95X0_SELPWM:
case CY8C95X0_INVERT:
- case CY8C95X0_PWMSEL:
case CY8C95X0_DIRECTION:
case CY8C95X0_DRV_PU:
case CY8C95X0_DRV_PD:
@@ -395,7 +399,7 @@ static bool cy8c95x0_muxed_register(unsigned int reg)
{
switch (reg) {
case CY8C95X0_INTMASK:
- case CY8C95X0_PWMSEL:
+ case CY8C95X0_SELPWM:
case CY8C95X0_INVERT:
case CY8C95X0_DIRECTION:
case CY8C95X0_DRV_PU:
@@ -466,7 +470,11 @@ static const struct regmap_config cy8c9520_i2c_regmap = {
.max_register = 0, /* Updated at runtime */
.num_reg_defaults_raw = 0, /* Updated at runtime */
.use_single_read = true, /* Workaround for regcache bug */
+#if IS_ENABLED(CONFIG_DEBUG_PINCTRL)
+ .disable_locking = false,
+#else
.disable_locking = true,
+#endif
};
static inline int cy8c95x0_regmap_update_bits_base(struct cy8c95x0_pinctrl *chip,
@@ -789,7 +797,7 @@ static int cy8c95x0_gpio_get_pincfg(struct cy8c95x0_pinctrl *chip,
reg = CY8C95X0_DIRECTION;
break;
case PIN_CONFIG_MODE_PWM:
- reg = CY8C95X0_PWMSEL;
+ reg = CY8C95X0_SELPWM;
break;
case PIN_CONFIG_OUTPUT:
reg = CY8C95X0_OUTPUT;
@@ -868,7 +876,7 @@ static int cy8c95x0_gpio_set_pincfg(struct cy8c95x0_pinctrl *chip,
reg = CY8C95X0_DRV_PP_FAST;
break;
case PIN_CONFIG_MODE_PWM:
- reg = CY8C95X0_PWMSEL;
+ reg = CY8C95X0_SELPWM;
break;
case PIN_CONFIG_OUTPUT_ENABLE:
return cy8c95x0_pinmux_direction(chip, off, !arg);
@@ -1153,7 +1161,7 @@ static void cy8c95x0_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *
bitmap_zero(mask, MAX_LINE);
__set_bit(pin, mask);
- if (cy8c95x0_read_regs_mask(chip, CY8C95X0_PWMSEL, pwm, mask)) {
+ if (cy8c95x0_read_regs_mask(chip, CY8C95X0_SELPWM, pwm, mask)) {
seq_puts(s, "not available");
return;
}
@@ -1198,7 +1206,7 @@ static int cy8c95x0_set_mode(struct cy8c95x0_pinctrl *chip, unsigned int off, bo
u8 port = cypress_get_port(chip, off);
u8 bit = cypress_get_pin_mask(chip, off);
- return cy8c95x0_regmap_write_bits(chip, CY8C95X0_PWMSEL, port, bit, mode ? bit : 0);
+ return cy8c95x0_regmap_write_bits(chip, CY8C95X0_SELPWM, port, bit, mode ? bit : 0);
}
static int cy8c95x0_pinmux_mode(struct cy8c95x0_pinctrl *chip,
@@ -1347,7 +1355,7 @@ static int cy8c95x0_irq_setup(struct cy8c95x0_pinctrl *chip, int irq)
ret = devm_request_threaded_irq(chip->dev, irq,
NULL, cy8c95x0_irq_handler,
- IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_HIGH,
+ IRQF_ONESHOT | IRQF_SHARED,
dev_name(chip->dev), chip);
if (ret) {
dev_err(chip->dev, "failed to request irq %d\n", irq);
@@ -1438,15 +1446,15 @@ static int cy8c95x0_probe(struct i2c_client *client)
switch (chip->tpin) {
case 20:
strscpy(chip->name, cy8c95x0_id[0].name);
- regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 3 * MUXED_STRIDE;
+ regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 3 * MUXED_STRIDE - 1;
break;
case 40:
strscpy(chip->name, cy8c95x0_id[1].name);
- regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 6 * MUXED_STRIDE;
+ regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 6 * MUXED_STRIDE - 1;
break;
case 60:
strscpy(chip->name, cy8c95x0_id[2].name);
- regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 8 * MUXED_STRIDE;
+ regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 8 * MUXED_STRIDE - 1;
break;
default:
return -ENODEV;
diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
index 31015ebe20d8..092252eb95a8 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -2,6 +2,7 @@
/* Author: Dan Scally <djrscally@gmail.com> */
#include <linux/acpi.h>
+#include <linux/array_size.h>
#include <linux/bitfield.h>
#include <linux/device.h>
#include <linux/gpio/consumer.h>
@@ -55,7 +56,7 @@ static void skl_int3472_log_sensor_module_name(struct int3472_discrete_device *i
static int skl_int3472_fill_gpiod_lookup(struct gpiod_lookup *table_entry,
struct acpi_resource_gpio *agpio,
- const char *func, u32 polarity)
+ const char *func, unsigned long gpio_flags)
{
char *path = agpio->resource_source.string_ptr;
struct acpi_device *adev;
@@ -70,14 +71,14 @@ static int skl_int3472_fill_gpiod_lookup(struct gpiod_lookup *table_entry,
if (!adev)
return -ENODEV;
- *table_entry = GPIO_LOOKUP(acpi_dev_name(adev), agpio->pin_table[0], func, polarity);
+ *table_entry = GPIO_LOOKUP(acpi_dev_name(adev), agpio->pin_table[0], func, gpio_flags);
return 0;
}
static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472,
struct acpi_resource_gpio *agpio,
- const char *func, u32 polarity)
+ const char *func, unsigned long gpio_flags)
{
int ret;
@@ -87,7 +88,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
}
ret = skl_int3472_fill_gpiod_lookup(&int3472->gpios.table[int3472->n_sensor_gpios],
- agpio, func, polarity);
+ agpio, func, gpio_flags);
if (ret)
return ret;
@@ -100,7 +101,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
static struct gpio_desc *
skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472,
struct acpi_resource_gpio *agpio,
- const char *func, u32 polarity)
+ const char *func, unsigned long gpio_flags)
{
struct gpio_desc *desc;
int ret;
@@ -111,7 +112,7 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472,
return ERR_PTR(-ENOMEM);
lookup->dev_id = dev_name(int3472->dev);
- ret = skl_int3472_fill_gpiod_lookup(&lookup->table[0], agpio, func, polarity);
+ ret = skl_int3472_fill_gpiod_lookup(&lookup->table[0], agpio, func, gpio_flags);
if (ret)
return ERR_PTR(ret);
@@ -122,32 +123,76 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472,
return desc;
}
-static void int3472_get_func_and_polarity(u8 type, const char **func, u32 *polarity)
+/**
+ * struct int3472_gpio_map - Map GPIOs to whatever is expected by the
+ * sensor driver (as in DT bindings)
+ * @hid: The ACPI HID of the device without the instance number e.g. INT347E
+ * @type_from: The GPIO type from ACPI ?SDT
+ * @type_to: The assigned GPIO type, typically same as @type_from
+ * @func: The function, e.g. "enable"
+ * @polarity_low: GPIO_ACTIVE_LOW true if the @polarity_low is true,
+ * GPIO_ACTIVE_HIGH otherwise
+ */
+struct int3472_gpio_map {
+ const char *hid;
+ u8 type_from;
+ u8 type_to;
+ bool polarity_low;
+ const char *func;
+};
+
+static const struct int3472_gpio_map int3472_gpio_map[] = {
+ { "INT347E", INT3472_GPIO_TYPE_RESET, INT3472_GPIO_TYPE_RESET, false, "enable" },
+};
+
+static void int3472_get_func_and_polarity(struct acpi_device *adev, u8 *type,
+ const char **func, unsigned long *gpio_flags)
{
- switch (type) {
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(int3472_gpio_map); i++) {
+ /*
+ * Map the firmware-provided GPIO to whatever a driver expects
+ * (as in DT bindings). First check if the type matches with the
+ * GPIO map, then further check that the device _HID matches.
+ */
+ if (*type != int3472_gpio_map[i].type_from)
+ continue;
+
+ if (!acpi_dev_hid_uid_match(adev, int3472_gpio_map[i].hid, NULL))
+ continue;
+
+ *type = int3472_gpio_map[i].type_to;
+ *gpio_flags = int3472_gpio_map[i].polarity_low ?
+ GPIO_ACTIVE_LOW : GPIO_ACTIVE_HIGH;
+ *func = int3472_gpio_map[i].func;
+ return;
+ }
+
+ switch (*type) {
case INT3472_GPIO_TYPE_RESET:
*func = "reset";
- *polarity = GPIO_ACTIVE_LOW;
+ *gpio_flags = GPIO_ACTIVE_LOW;
break;
case INT3472_GPIO_TYPE_POWERDOWN:
*func = "powerdown";
- *polarity = GPIO_ACTIVE_LOW;
+ *gpio_flags = GPIO_ACTIVE_LOW;
break;
case INT3472_GPIO_TYPE_CLK_ENABLE:
*func = "clk-enable";
- *polarity = GPIO_ACTIVE_HIGH;
+ *gpio_flags = GPIO_ACTIVE_HIGH;
break;
case INT3472_GPIO_TYPE_PRIVACY_LED:
*func = "privacy-led";
- *polarity = GPIO_ACTIVE_HIGH;
+ *gpio_flags = GPIO_ACTIVE_HIGH;
break;
case INT3472_GPIO_TYPE_POWER_ENABLE:
*func = "power-enable";
- *polarity = GPIO_ACTIVE_HIGH;
+ *gpio_flags = GPIO_ACTIVE_HIGH;
break;
default:
*func = "unknown";
- *polarity = GPIO_ACTIVE_HIGH;
+ *gpio_flags = GPIO_ACTIVE_HIGH;
break;
}
}
@@ -194,7 +239,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
struct gpio_desc *gpio;
const char *err_msg;
const char *func;
- u32 polarity;
+ unsigned long gpio_flags;
int ret;
if (!acpi_gpio_get_io_resource(ares, &agpio))
@@ -217,7 +262,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
type = FIELD_GET(INT3472_GPIO_DSM_TYPE, obj->integer.value);
- int3472_get_func_and_polarity(type, &func, &polarity);
+ int3472_get_func_and_polarity(int3472->sensor, &type, &func, &gpio_flags);
pin = FIELD_GET(INT3472_GPIO_DSM_PIN, obj->integer.value);
/* Pin field is not really used under Windows and wraps around at 8 bits */
@@ -227,16 +272,16 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
active_value = FIELD_GET(INT3472_GPIO_DSM_SENSOR_ON_VAL, obj->integer.value);
if (!active_value)
- polarity ^= GPIO_ACTIVE_LOW;
+ gpio_flags ^= GPIO_ACTIVE_LOW;
dev_dbg(int3472->dev, "%s %s pin %d active-%s\n", func,
agpio->resource_source.string_ptr, agpio->pin_table[0],
- str_high_low(polarity == GPIO_ACTIVE_HIGH));
+ str_high_low(gpio_flags == GPIO_ACTIVE_HIGH));
switch (type) {
case INT3472_GPIO_TYPE_RESET:
case INT3472_GPIO_TYPE_POWERDOWN:
- ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, func, polarity);
+ ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, func, gpio_flags);
if (ret)
err_msg = "Failed to map GPIO pin to sensor\n";
@@ -244,7 +289,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
case INT3472_GPIO_TYPE_CLK_ENABLE:
case INT3472_GPIO_TYPE_PRIVACY_LED:
case INT3472_GPIO_TYPE_POWER_ENABLE:
- gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, func, polarity);
+ gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, func, gpio_flags);
if (IS_ERR(gpio)) {
ret = PTR_ERR(gpio);
err_msg = "Failed to get GPIO\n";
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 1fcb0f99695a..72a10ed2017c 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -7885,6 +7885,7 @@ static struct ibm_struct volume_driver_data = {
#define FAN_NS_CTRL_STATUS BIT(2) /* Bit which determines control is enabled or not */
#define FAN_NS_CTRL BIT(4) /* Bit which determines control is by host or EC */
+#define FAN_CLOCK_TPM (22500*60) /* Ticks per minute for a 22.5 kHz clock */
enum { /* Fan control constants */
fan_status_offset = 0x2f, /* EC register 0x2f */
@@ -7940,6 +7941,7 @@ static int fan_watchdog_maxinterval;
static bool fan_with_ns_addr;
static bool ecfw_with_fan_dec_rpm;
+static bool fan_speed_in_tpr;
static struct mutex fan_mutex;
@@ -8142,8 +8144,11 @@ static int fan_get_speed(unsigned int *speed)
!acpi_ec_read(fan_rpm_offset + 1, &hi)))
return -EIO;
- if (likely(speed))
+ if (likely(speed)) {
*speed = (hi << 8) | lo;
+ if (fan_speed_in_tpr && *speed != 0)
+ *speed = FAN_CLOCK_TPM / *speed;
+ }
break;
case TPACPI_FAN_RD_TPEC_NS:
if (!acpi_ec_read(fan_rpm_status_ns, &lo))
@@ -8176,8 +8181,11 @@ static int fan2_get_speed(unsigned int *speed)
if (rc)
return -EIO;
- if (likely(speed))
+ if (likely(speed)) {
*speed = (hi << 8) | lo;
+ if (fan_speed_in_tpr && *speed != 0)
+ *speed = FAN_CLOCK_TPM / *speed;
+ }
break;
case TPACPI_FAN_RD_TPEC_NS:
@@ -8788,6 +8796,7 @@ static const struct attribute_group fan_driver_attr_group = {
#define TPACPI_FAN_NOFAN 0x0008 /* no fan available */
#define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */
#define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */
+#define TPACPI_FAN_TPR 0x0040 /* Fan speed is in Ticks Per Revolution */
static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1),
@@ -8817,6 +8826,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
TPACPI_Q_LNV3('R', '0', 'V', TPACPI_FAN_NS), /* 11e Gen5 KL-Y */
TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */
TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */
+ TPACPI_Q_LNV('8', 'F', TPACPI_FAN_TPR), /* ThinkPad x120e */
};
static int __init fan_init(struct ibm_init_struct *iibm)
@@ -8887,6 +8897,8 @@ static int __init fan_init(struct ibm_init_struct *iibm)
if (quirks & TPACPI_FAN_Q1)
fan_quirk1_setup();
+ if (quirks & TPACPI_FAN_TPR)
+ fan_speed_in_tpr = true;
/* Try and probe the 2nd fan */
tp_features.second_fan = 1; /* needed for get_speed to work */
res = fan2_get_speed(&speed);
@@ -10319,6 +10331,10 @@ static struct ibm_struct proxsensor_driver_data = {
#define DYTC_MODE_PSC_BALANCE 5 /* Default mode aka balanced */
#define DYTC_MODE_PSC_PERFORM 7 /* High power mode aka performance */
+#define DYTC_MODE_PSCV9_LOWPOWER 1 /* Low power mode */
+#define DYTC_MODE_PSCV9_BALANCE 3 /* Default mode aka balanced */
+#define DYTC_MODE_PSCV9_PERFORM 4 /* High power mode aka performance */
+
#define DYTC_ERR_MASK 0xF /* Bits 0-3 in cmd result are the error result */
#define DYTC_ERR_SUCCESS 1 /* CMD completed successful */
@@ -10339,6 +10355,10 @@ static int dytc_capabilities;
static bool dytc_mmc_get_available;
static int profile_force;
+static int platform_psc_profile_lowpower = DYTC_MODE_PSC_LOWPOWER;
+static int platform_psc_profile_balanced = DYTC_MODE_PSC_BALANCE;
+static int platform_psc_profile_performance = DYTC_MODE_PSC_PERFORM;
+
static int convert_dytc_to_profile(int funcmode, int dytcmode,
enum platform_profile_option *profile)
{
@@ -10360,19 +10380,15 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode,
}
return 0;
case DYTC_FUNCTION_PSC:
- switch (dytcmode) {
- case DYTC_MODE_PSC_LOWPOWER:
+ if (dytcmode == platform_psc_profile_lowpower)
*profile = PLATFORM_PROFILE_LOW_POWER;
- break;
- case DYTC_MODE_PSC_BALANCE:
+ else if (dytcmode == platform_psc_profile_balanced)
*profile = PLATFORM_PROFILE_BALANCED;
- break;
- case DYTC_MODE_PSC_PERFORM:
+ else if (dytcmode == platform_psc_profile_performance)
*profile = PLATFORM_PROFILE_PERFORMANCE;
- break;
- default: /* Unknown mode */
+ else
return -EINVAL;
- }
+
return 0;
case DYTC_FUNCTION_AMT:
/* For now return balanced. It's the closest we have to 'auto' */
@@ -10393,19 +10409,19 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe
if (dytc_capabilities & BIT(DYTC_FC_MMC))
*perfmode = DYTC_MODE_MMC_LOWPOWER;
else if (dytc_capabilities & BIT(DYTC_FC_PSC))
- *perfmode = DYTC_MODE_PSC_LOWPOWER;
+ *perfmode = platform_psc_profile_lowpower;
break;
case PLATFORM_PROFILE_BALANCED:
if (dytc_capabilities & BIT(DYTC_FC_MMC))
*perfmode = DYTC_MODE_MMC_BALANCE;
else if (dytc_capabilities & BIT(DYTC_FC_PSC))
- *perfmode = DYTC_MODE_PSC_BALANCE;
+ *perfmode = platform_psc_profile_balanced;
break;
case PLATFORM_PROFILE_PERFORMANCE:
if (dytc_capabilities & BIT(DYTC_FC_MMC))
*perfmode = DYTC_MODE_MMC_PERFORM;
else if (dytc_capabilities & BIT(DYTC_FC_PSC))
- *perfmode = DYTC_MODE_PSC_PERFORM;
+ *perfmode = platform_psc_profile_performance;
break;
default: /* Unknown profile */
return -EOPNOTSUPP;
@@ -10599,6 +10615,7 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
if (output & BIT(DYTC_QUERY_ENABLE_BIT))
dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF;
+ dbg_printk(TPACPI_DBG_INIT, "DYTC version %d\n", dytc_version);
/* Check DYTC is enabled and supports mode setting */
if (dytc_version < 5)
return -ENODEV;
@@ -10637,6 +10654,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
}
} else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */
pr_debug("PSC is supported\n");
+ if (dytc_version >= 9) { /* update profiles for DYTC 9 and up */
+ platform_psc_profile_lowpower = DYTC_MODE_PSCV9_LOWPOWER;
+ platform_psc_profile_balanced = DYTC_MODE_PSCV9_BALANCE;
+ platform_psc_profile_performance = DYTC_MODE_PSCV9_PERFORM;
+ }
} else {
dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n");
return -ENODEV;
@@ -10646,8 +10668,8 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
"DYTC version %d: thermal mode available\n", dytc_version);
/* Create platform_profile structure and register */
- tpacpi_pprof = devm_platform_profile_register(&tpacpi_pdev->dev, "thinkpad-acpi",
- NULL, &dytc_profile_ops);
+ tpacpi_pprof = platform_profile_register(&tpacpi_pdev->dev, "thinkpad-acpi-profile",
+ NULL, &dytc_profile_ops);
/*
* If for some reason platform_profiles aren't enabled
* don't quit terminally.
@@ -10665,8 +10687,15 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
return 0;
}
+static void dytc_profile_exit(void)
+{
+ if (!IS_ERR_OR_NULL(tpacpi_pprof))
+ platform_profile_remove(tpacpi_pprof);
+}
+
static struct ibm_struct dytc_profile_driver_data = {
.name = "dytc-profile",
+ .exit = dytc_profile_exit,
};
/*************************************************************************
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 52c32dcbf7d8..4112a0097338 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -627,8 +627,7 @@ struct powercap_control_type *powercap_register_control_type(
dev_set_name(&control_type->dev, "%s", name);
result = device_register(&control_type->dev);
if (result) {
- if (control_type->allocated)
- kfree(control_type);
+ put_device(&control_type->dev);
return ERR_PTR(result);
}
idr_init(&control_type->idr);
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 1fd2da0264e3..47d74f881948 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -2867,7 +2867,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n",
cpu_to_le32(upper_32_bits(dma_handle)),
cpu_to_le32(lower_32_bits(dma_handle)),
- cpu_to_le32(sg_dma_len(sg_next(s))));
+ cpu_to_le32(sg_dma_len(s)));
remseg--;
}
dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather "
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d776f13cd160..be0890e4e706 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -872,13 +872,18 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
case 0x1a: /* start stop unit in progress */
case 0x1b: /* sanitize in progress */
case 0x1d: /* configuration in progress */
- case 0x24: /* depopulation in progress */
- case 0x25: /* depopulation restore in progress */
action = ACTION_DELAYED_RETRY;
break;
case 0x0a: /* ALUA state transition */
action = ACTION_DELAYED_REPREP;
break;
+ /*
+ * Depopulation might take many hours,
+ * thus it is not worthwhile to retry.
+ */
+ case 0x24: /* depopulation in progress */
+ case 0x25: /* depopulation restore in progress */
+ fallthrough;
default:
action = ACTION_FAIL;
break;
diff --git a/drivers/scsi/scsi_lib_test.c b/drivers/scsi/scsi_lib_test.c
index 99834426a100..ae8af0e0047a 100644
--- a/drivers/scsi/scsi_lib_test.c
+++ b/drivers/scsi/scsi_lib_test.c
@@ -67,6 +67,13 @@ static void scsi_lib_test_multiple_sense(struct kunit *test)
};
int i;
+ /* Success */
+ sc.result = 0;
+ KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+ KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL));
+ /* Command failed but caller did not pass in a failures array */
+ scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36);
+ KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL));
/* Match end of array */
scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36);
KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 087fcbfc9aaa..96d7e1a9a7c7 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -246,7 +246,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
}
ret = sbitmap_init_node(&sdev->budget_map,
scsi_device_max_queue_depth(sdev),
- new_shift, GFP_KERNEL,
+ new_shift, GFP_NOIO,
sdev->request_queue->node, false, true);
if (!ret)
sbitmap_resize(&sdev->budget_map, depth);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 5a101ac06c47..a8614e54544e 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1800,6 +1800,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
length = scsi_bufflen(scmnd);
payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
+ payload->range.len = 0;
payload_sz = 0;
if (scsi_sg_count(scmnd)) {
diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c
index 4783ab1adb8d..a3e88ced328a 100644
--- a/drivers/soc/qcom/smp2p.c
+++ b/drivers/soc/qcom/smp2p.c
@@ -365,7 +365,7 @@ static void smp2p_irq_print_chip(struct irq_data *irqd, struct seq_file *p)
{
struct smp2p_entry *entry = irq_data_get_irq_chip_data(irqd);
- seq_printf(p, " %8s", dev_name(entry->smp2p->dev));
+ seq_printf(p, "%8s", dev_name(entry->smp2p->dev));
}
static struct irq_chip smp2p_irq_chip = {
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index c42cbde8a31b..210648a0092e 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -117,9 +117,9 @@ static ssize_t target_stat_tgt_status_show(struct config_item *item,
char *page)
{
if (to_stat_tgt_dev(item)->export_count)
- return snprintf(page, PAGE_SIZE, "activated");
+ return snprintf(page, PAGE_SIZE, "activated\n");
else
- return snprintf(page, PAGE_SIZE, "deactivated");
+ return snprintf(page, PAGE_SIZE, "deactivated\n");
}
static ssize_t target_stat_tgt_non_access_lus_show(struct config_item *item,
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index df08f13052ff..8bb1a01fef2a 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -798,7 +798,7 @@ static int ptmx_open(struct inode *inode, struct file *filp)
nonseekable_open(inode, filp);
/* We refuse fsnotify events on ptmx, since it's a shared resource */
- filp->f_mode |= FMODE_NONOTIFY;
+ file_set_fsnotify_mode(filp, FMODE_NONOTIFY);
retval = tty_alloc_file(filp);
if (retval)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index cd404ade48dc..1893a7ad9531 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -2120,8 +2120,6 @@ static void ufshcd_init_clk_gating(struct ufs_hba *hba)
INIT_DELAYED_WORK(&hba->clk_gating.gate_work, ufshcd_gate_work);
INIT_WORK(&hba->clk_gating.ungate_work, ufshcd_ungate_work);
- spin_lock_init(&hba->clk_gating.lock);
-
hba->clk_gating.clk_gating_workq = alloc_ordered_workqueue(
"ufs_clk_gating_%d", WQ_MEM_RECLAIM | WQ_HIGHPRI,
hba->host->host_no);
@@ -3106,8 +3104,13 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
case UPIU_TRANSACTION_QUERY_RSP: {
u8 response = lrbp->ucd_rsp_ptr->header.response;
- if (response == 0)
+ if (response == 0) {
err = ufshcd_copy_query_response(hba, lrbp);
+ } else {
+ err = -EINVAL;
+ dev_err(hba->dev, "%s: unexpected response in Query RSP: %x\n",
+ __func__, response);
+ }
break;
}
case UPIU_TRANSACTION_REJECT_UPIU:
@@ -5976,24 +5979,6 @@ out:
__func__, err);
}
-static void ufshcd_temp_exception_event_handler(struct ufs_hba *hba, u16 status)
-{
- u32 value;
-
- if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
- QUERY_ATTR_IDN_CASE_ROUGH_TEMP, 0, 0, &value))
- return;
-
- dev_info(hba->dev, "exception Tcase %d\n", value - 80);
-
- ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP);
-
- /*
- * A placeholder for the platform vendors to add whatever additional
- * steps required
- */
-}
-
static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn)
{
u8 index;
@@ -6214,7 +6199,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
ufshcd_bkops_exception_event_handler(hba);
if (status & hba->ee_drv_mask & MASK_EE_URGENT_TEMP)
- ufshcd_temp_exception_event_handler(hba, status);
+ ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP);
ufs_debugfs_exception_event(hba, status);
}
@@ -9160,7 +9145,7 @@ out:
if (!IS_ERR_OR_NULL(clki->clk) && clki->enabled)
clk_disable_unprepare(clki->clk);
}
- } else if (!ret && on) {
+ } else if (!ret && on && hba->clk_gating.is_initialized) {
scoped_guard(spinlock_irqsave, &hba->clk_gating.lock)
hba->clk_gating.state = CLKS_ON;
trace_ufshcd_clk_gating(dev_name(hba->dev),
@@ -10247,16 +10232,6 @@ EXPORT_SYMBOL_GPL(ufshcd_system_thaw);
#endif /* CONFIG_PM_SLEEP */
/**
- * ufshcd_dealloc_host - deallocate Host Bus Adapter (HBA)
- * @hba: pointer to Host Bus Adapter (HBA)
- */
-void ufshcd_dealloc_host(struct ufs_hba *hba)
-{
- scsi_host_put(hba->host);
-}
-EXPORT_SYMBOL_GPL(ufshcd_dealloc_host);
-
-/**
* ufshcd_set_dma_mask - Set dma mask based on the controller
* addressing capability
* @hba: per adapter instance
@@ -10275,11 +10250,25 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba)
}
/**
+ * ufshcd_devres_release - devres cleanup handler, invoked during release of
+ * hba->dev
+ * @host: pointer to SCSI host
+ */
+static void ufshcd_devres_release(void *host)
+{
+ scsi_host_put(host);
+}
+
+/**
* ufshcd_alloc_host - allocate Host Bus Adapter (HBA)
* @dev: pointer to device handle
* @hba_handle: driver private handle
*
* Return: 0 on success, non-zero value on failure.
+ *
+ * NOTE: There is no corresponding ufshcd_dealloc_host() because this function
+ * keeps track of its allocations using devres and deallocates everything on
+ * device removal automatically.
*/
int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
{
@@ -10301,6 +10290,13 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
err = -ENOMEM;
goto out_error;
}
+
+ err = devm_add_action_or_reset(dev, ufshcd_devres_release,
+ host);
+ if (err)
+ return dev_err_probe(dev, err,
+ "failed to add ufshcd dealloc action\n");
+
host->nr_maps = HCTX_TYPE_POLL + 1;
hba = shost_priv(host);
hba->host = host;
@@ -10429,6 +10425,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
hba->irq = irq;
hba->vps = &ufs_hba_vps;
+ /*
+ * Initialize clk_gating.lock early since it is being used in
+ * ufshcd_setup_clocks()
+ */
+ spin_lock_init(&hba->clk_gating.lock);
+
err = ufshcd_hba_init(hba);
if (err)
goto out_error;
diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c
index ea39c5d5b8cf..9cfcaad23cf9 100644
--- a/drivers/ufs/host/ufshcd-pci.c
+++ b/drivers/ufs/host/ufshcd-pci.c
@@ -562,7 +562,6 @@ static void ufshcd_pci_remove(struct pci_dev *pdev)
pm_runtime_forbid(&pdev->dev);
pm_runtime_get_noresume(&pdev->dev);
ufshcd_remove(hba);
- ufshcd_dealloc_host(hba);
}
/**
@@ -605,7 +604,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err = ufshcd_init(hba, mmio_base, pdev->irq);
if (err) {
dev_err(&pdev->dev, "Initialization failed\n");
- ufshcd_dealloc_host(hba);
return err;
}
diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
index 505572d4fa87..ffe5d1d2b215 100644
--- a/drivers/ufs/host/ufshcd-pltfrm.c
+++ b/drivers/ufs/host/ufshcd-pltfrm.c
@@ -465,21 +465,17 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
struct device *dev = &pdev->dev;
mmio_base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(mmio_base)) {
- err = PTR_ERR(mmio_base);
- goto out;
- }
+ if (IS_ERR(mmio_base))
+ return PTR_ERR(mmio_base);
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- err = irq;
- goto out;
- }
+ if (irq < 0)
+ return irq;
err = ufshcd_alloc_host(dev, &hba);
if (err) {
dev_err(dev, "Allocation failed\n");
- goto out;
+ return err;
}
hba->vops = vops;
@@ -488,13 +484,13 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
if (err) {
dev_err(dev, "%s: clock parse failed %d\n",
__func__, err);
- goto dealloc_host;
+ return err;
}
err = ufshcd_parse_regulator_info(hba);
if (err) {
dev_err(dev, "%s: regulator init failed %d\n",
__func__, err);
- goto dealloc_host;
+ return err;
}
ufshcd_init_lanes_per_dir(hba);
@@ -502,25 +498,20 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
err = ufshcd_parse_operating_points(hba);
if (err) {
dev_err(dev, "%s: OPP parse failed %d\n", __func__, err);
- goto dealloc_host;
+ return err;
}
err = ufshcd_init(hba, mmio_base, irq);
if (err) {
dev_err_probe(dev, err, "Initialization failed with error %d\n",
err);
- goto dealloc_host;
+ return err;
}
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
return 0;
-
-dealloc_host:
- ufshcd_dealloc_host(hba);
-out:
- return err;
}
EXPORT_SYMBOL_GPL(ufshcd_pltfrm_init);
@@ -534,7 +525,6 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev)
pm_runtime_get_sync(&pdev->dev);
ufshcd_remove(hba);
- ufshcd_dealloc_host(hba);
pm_runtime_disable(&pdev->dev);
pm_runtime_put_noidle(&pdev->dev);
}
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index 85eea7a4dea3..fc7efd0a7525 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -61,6 +61,13 @@ config BCACHEFS_DEBUG
The resulting code will be significantly slower than normal; you
probably shouldn't select this option unless you're a developer.
+config BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ bool "Randomly inject transaction restarts"
+ depends on BCACHEFS_DEBUG
+ help
+ Randomly inject transaction restarts in a few core paths - may have a
+ significant performance penalty
+
config BCACHEFS_TESTS
bool "bcachefs unit and performance tests"
depends on BCACHEFS_FS
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index fc2ef33b67b3..3ea809990ef1 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1803,7 +1803,6 @@ struct discard_buckets_state {
u64 open;
u64 need_journal_commit;
u64 discarded;
- u64 need_journal_commit_this_dev;
};
static int bch2_discard_one_bucket(struct btree_trans *trans,
@@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto out;
}
- if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
- c->journal.flushed_seq_ondisk,
- pos.inode, pos.offset)) {
- s->need_journal_commit++;
- s->need_journal_commit_this_dev++;
+ u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
+ pos.inode, pos.offset);
+ if (seq_ready > c->journal.flushed_seq_ondisk) {
+ if (seq_ready > c->journal.flushing_seq)
+ s->need_journal_commit++;
goto out;
}
@@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
discard_locked = true;
}
- if (!bkey_eq(*discard_pos_done, iter.pos) &&
- ca->mi.discard && !c->opts.nochanges) {
- /*
- * This works without any other locks because this is the only
- * thread that removes items from the need_discard tree
- */
- bch2_trans_unlock_long(trans);
- blkdev_issue_discard(ca->disk_sb.bdev,
- k.k->p.offset * ca->mi.bucket_size,
- ca->mi.bucket_size,
- GFP_KERNEL);
- *discard_pos_done = iter.pos;
+ if (!bkey_eq(*discard_pos_done, iter.pos)) {
s->discarded++;
+ *discard_pos_done = iter.pos;
- ret = bch2_trans_relock_notrace(trans);
- if (ret)
- goto out;
+ if (ca->mi.discard && !c->opts.nochanges) {
+ /*
+ * This works without any other locks because this is the only
+ * thread that removes items from the need_discard tree
+ */
+ bch2_trans_unlock_long(trans);
+ blkdev_issue_discard(ca->disk_sb.bdev,
+ k.k->p.offset * ca->mi.bucket_size,
+ ca->mi.bucket_size,
+ GFP_KERNEL);
+ ret = bch2_trans_relock_notrace(trans);
+ if (ret)
+ goto out;
+ }
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
@@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work)
POS(ca->dev_idx, U64_MAX), 0, k,
bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false)));
+ if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
+ bch2_journal_flush_async(&c->journal, NULL);
+
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
@@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
break;
}
- trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
+ trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
bch2_trans_put(trans);
percpu_ref_put(&ca->io_ref);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 6df41c331a52..5a781fb4c794 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c,
return false;
}
- if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
- c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) {
+ u64 journal_seq_ready =
+ bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
+ bucket.inode, bucket.offset);
+ if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
+ if (journal_seq_ready > c->journal.flushing_seq)
+ s->need_journal_commit++;
s->skipped_need_journal_commit++;
return false;
}
@@ -570,7 +574,7 @@ alloc:
? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
: bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
- if (s.skipped_need_journal_commit * 2 > avail)
+ if (s.need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index 9bbb28e90b93..4aa8ee026cb8 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -18,6 +18,7 @@ struct bucket_alloc_state {
u64 buckets_seen;
u64 skipped_open;
u64 skipped_need_journal_commit;
+ u64 need_journal_commit;
u64 skipped_nocow;
u64 skipped_nouse;
u64 skipped_mi_btree_bitmap;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 5988219c6908..e32fce4fd258 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
if (iter->update_path) {
bch2_path_put_nokeep(trans, iter->update_path,
iter->flags & BTREE_ITER_intent);
@@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
+ int ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
while (1) {
k = __bch2_btree_iter_peek_prev(iter, search_key);
if (unlikely(!k.k))
@@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
/* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_is_extents) &&
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
@@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (ret)
+ return ERR_PTR(ret);
+
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
@@ -3163,7 +3185,8 @@ out_new_mem:
if (old_bytes) {
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
+ return ERR_PTR(btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
out_change_top:
p = trans->mem + trans->mem_top;
@@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->last_begin_ip = _RET_IP_;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (trans->restarted) {
+ trans->restart_count_this_trans++;
+ } else {
+ trans->restart_count_this_trans = 0;
+ }
+#endif
+
trans_set_locked(trans, false);
if (trans->restarted) {
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index b9538e6e6d65..b96157f3dc9c 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
return btree_trans_restart_ip(trans, err, _THIS_IP_);
}
+static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
+{
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
+ trace_and_count(trans->c, trans_restart_injected, trans, ip);
+ return btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_fault_inject, ip);
+ }
+#endif
+ return 0;
+}
+
bool bch2_btree_node_upgrade(struct btree_trans *,
struct btree_path *, unsigned);
@@ -739,7 +751,7 @@ transaction_restart: \
if (!_ret2) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
- _ret2 ?: trans_was_restarted(_trans, _restart_count); \
+ _ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
})
#define for_each_btree_key_max_continue(_trans, _iter, \
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index c378b97ebeca..1821f40c161a 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -748,7 +748,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
rcu_read_unlock();
mutex_lock(&bc->table.mutex);
mutex_unlock(&bc->table.mutex);
- rcu_read_lock();
continue;
}
for (i = 0; i < tbl->size; i++)
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 2760dd9569ed..c4f524b2ca9a 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -999,6 +999,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
bch2_trans_verify_not_unlocked_or_in_restart(trans);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret))
+ goto out_reset;
+
if (!trans->nr_updates &&
!trans->journal_entries_u64s)
goto out_reset;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index a6f251eb4164..a09cbe9cd94f 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -509,6 +509,9 @@ struct btree_trans {
bool notrace_relock_fail:1;
enum bch_errcode restarted:16;
u32 restart_count;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ u32 restart_count_this_trans;
+#endif
u64 last_begin_time;
unsigned long last_begin_ip;
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 7930ffea3075..26d646e1275c 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -278,12 +278,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt
{
struct bset_tree *t = bset_tree_last(b);
struct btree_node_entry *bne = max(write_block(b),
- (void *) btree_bkey_last(b, bset_tree_last(b)));
+ (void *) btree_bkey_last(b, t));
ssize_t remaining_space =
__bch2_btree_u64s_remaining(b, bne->keys.start);
if (unlikely(bset_written(b, bset(b, t)))) {
- if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
+ if (b->written + block_sectors(c) <= btree_sectors(c))
return bne;
} else {
if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c
index f9fb150eda70..c8a488e6b7b8 100644
--- a/fs/bcachefs/buckets_waiting_for_journal.c
+++ b/fs/bcachefs/buckets_waiting_for_journal.c
@@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_
memset(t->d, 0, sizeof(t->d[0]) << t->bits);
}
-bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
- u64 flushed_seq,
- unsigned dev, u64 bucket)
+u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b,
+ unsigned dev, u64 bucket)
{
struct buckets_waiting_for_journal_table *t;
u64 dev_bucket = (u64) dev << 56 | bucket;
- bool ret = false;
- unsigned i;
+ u64 ret = 0;
mutex_lock(&b->lock);
t = b->t;
- for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
if (h->dev_bucket == dev_bucket) {
- ret = h->journal_seq > flushed_seq;
+ ret = h->journal_seq;
break;
}
}
diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h
index d2ae19cbe18c..365619ca44c8 100644
--- a/fs/bcachefs/buckets_waiting_for_journal.h
+++ b/fs/bcachefs/buckets_waiting_for_journal.h
@@ -4,8 +4,8 @@
#include "buckets_waiting_for_journal_types.h"
-bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
- u64, unsigned, u64);
+u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *,
+ unsigned, u64);
int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
u64, unsigned, u64, u64);
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index 5360cbb3ec29..f4372cafea2e 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -210,11 +210,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
u64 *v, unsigned nr)
{
+ percpu_down_read(&c->mark_lock);
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &p);
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
+ percpu_up_read(&c->mark_lock);
}
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index d2e134528f0e..428b9be6af34 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -285,12 +285,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *,
struct bch_inode_unpacked *);
int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *);
+#include "rebalance.h"
+
static inline struct bch_extent_rebalance
bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode)
{
struct bch_io_opts io_opts;
bch2_inode_opts_get(&io_opts, c, inode);
- return io_opts_to_rebalance_opts(&io_opts);
+ return io_opts_to_rebalance_opts(c, &io_opts);
}
int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32);
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index dd508d93e9fc..03892388832b 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -411,6 +411,16 @@ void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
__bch2_write_op_error(out, op, op->pos.offset);
}
+static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
+ struct bch_write_op *op, u64 offset)
+{
+ bch2_inum_offset_err_msg_trans(trans, out,
+ (subvol_inum) { op->subvol, op->pos.inode, },
+ offset << 9);
+ prt_printf(out, "write error%s: ",
+ op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
+}
+
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
enum bch_data_type type,
const struct bkey_i *k,
@@ -1193,7 +1203,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
struct printbuf buf = PRINTBUF;
- __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
+ bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k));
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index cb2c3722f674..24c294d4634e 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -319,6 +319,16 @@ void bch2_journal_halt(struct journal *j)
spin_unlock(&j->lock);
}
+void bch2_journal_halt_locked(struct journal *j)
+{
+ lockdep_assert_held(&j->lock);
+
+ __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true);
+ if (!j->err_seq)
+ j->err_seq = journal_cur_seq(j);
+ journal_wake(j);
+}
+
static bool journal_entry_want_write(struct journal *j)
{
bool ret = !journal_entry_is_open(j) ||
@@ -381,9 +391,12 @@ static int journal_entry_open(struct journal *j)
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
return JOURNAL_ERR_max_in_flight;
- if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX,
- c, "cannot start: journal seq overflow"))
+ if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
+ bch_err(c, "cannot start: journal seq overflow");
+ if (bch2_fs_emergency_read_only_locked(c))
+ bch_err(c, "fatal error - emergency read only");
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
+ }
BUG_ON(!j->cur_entry_sectors);
@@ -783,6 +796,7 @@ recheck_need_open:
}
buf->must_flush = true;
+ j->flushing_seq = max(j->flushing_seq, seq);
if (parent && !closure_wait(&buf->wait, parent))
BUG();
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index dccddd5420ad..107f7f901cd9 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -409,6 +409,7 @@ bool bch2_journal_noflush_seq(struct journal *, u64, u64);
int bch2_journal_meta(struct journal *);
void bch2_journal_halt(struct journal *);
+void bch2_journal_halt_locked(struct journal *);
static inline int bch2_journal_error(struct journal *j)
{
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 6a9cefb635d6..d373cd181a7f 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
spin_unlock(&j->lock);
}
-static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
+static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
+ journal_pin_flush_fn fn)
{
if (fn == bch2_btree_node_flush0 ||
- fn == bch2_btree_node_flush1)
- return JOURNAL_PIN_TYPE_btree;
- else if (fn == bch2_btree_key_cache_journal_flush)
+ fn == bch2_btree_node_flush1) {
+ unsigned idx = fn == bch2_btree_node_flush1;
+ struct btree *b = container_of(pin, struct btree, writes[idx].journal);
+
+ return JOURNAL_PIN_TYPE_btree0 - b->c.level;
+ } else if (fn == bch2_btree_key_cache_journal_flush)
return JOURNAL_PIN_TYPE_key_cache;
else
return JOURNAL_PIN_TYPE_other;
@@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
bool reclaim = __journal_pin_drop(j, dst);
- bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
+ bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
if (reclaim)
bch2_journal_reclaim_fast(j);
@@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
bool reclaim = __journal_pin_drop(j, pin);
- bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
+ bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
if (reclaim)
bch2_journal_reclaim_fast(j);
@@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
spin_lock(&j->lock);
/* Pin might have been dropped or rearmed: */
if (likely(!err && !j->flush_in_progress_dropped))
- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
j->flush_in_progress = NULL;
j->flush_in_progress_dropped = false;
spin_unlock(&j->lock);
@@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
- if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
- BIT(JOURNAL_PIN_TYPE_key_cache)|
- BIT(JOURNAL_PIN_TYPE_other))) {
- *did_work = true;
- goto unlock;
- }
-
- if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
- BIT(JOURNAL_PIN_TYPE_btree))) {
- *did_work = true;
- goto unlock;
- }
+ for (int type = JOURNAL_PIN_TYPE_NR - 1;
+ type >= 0;
+ --type)
+ if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
+ *did_work = true;
+ goto unlock;
+ }
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 3ba433a48eb8..1ef3a28ed6ab 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -53,7 +53,10 @@ struct journal_buf {
*/
enum journal_pin_type {
- JOURNAL_PIN_TYPE_btree,
+ JOURNAL_PIN_TYPE_btree3,
+ JOURNAL_PIN_TYPE_btree2,
+ JOURNAL_PIN_TYPE_btree1,
+ JOURNAL_PIN_TYPE_btree0,
JOURNAL_PIN_TYPE_key_cache,
JOURNAL_PIN_TYPE_other,
JOURNAL_PIN_TYPE_NR,
@@ -237,6 +240,7 @@ struct journal {
/* seq, last_seq from the most recent journal entry successfully written */
u64 seq_ondisk;
u64 flushed_seq_ondisk;
+ u64 flushing_seq;
u64 last_seq_ondisk;
u64 err_seq;
u64 last_empty_seq;
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index a182b5d454ba..9d397fc2a1f0 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -659,18 +659,4 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts)
struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
bool bch2_opt_is_inode_opt(enum bch_opt_id);
-/* rebalance opts: */
-
-static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts)
-{
- return (struct bch_extent_rebalance) {
- .type = BIT(BCH_EXTENT_ENTRY_rebalance),
-#define x(_name) \
- ._name = opts->_name, \
- ._name##_from_inode = opts->_name##_from_inode,
- BCH_REBALANCE_OPTS()
-#undef x
- };
-};
-
#endif /* _BCACHEFS_OPTS_H */
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 4adc74cd3f70..d0a1f5cd5c2b 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -121,12 +121,10 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
}
}
incompressible:
- if (opts->background_target &&
- bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) {
+ if (opts->background_target)
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
sectors += p.crc.compressed_size;
- }
return sectors;
}
@@ -140,7 +138,7 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opt
const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k);
if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) {
- struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts);
+ struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts);
return old == NULL || memcmp(old, &new, sizeof(new));
} else {
return old != NULL;
@@ -163,7 +161,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts,
k.k->u64s += sizeof(*old) / sizeof(u64);
}
- *old = io_opts_to_rebalance_opts(opts);
+ *old = io_opts_to_rebalance_opts(c, opts);
} else {
if (old)
extent_entry_drop(k, (union bch_extent_entry *) old);
diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h
index 0a0821ab895d..62a3859d3823 100644
--- a/fs/bcachefs/rebalance.h
+++ b/fs/bcachefs/rebalance.h
@@ -4,8 +4,28 @@
#include "compress.h"
#include "disk_groups.h"
+#include "opts.h"
#include "rebalance_types.h"
+static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c,
+ struct bch_io_opts *opts)
+{
+ struct bch_extent_rebalance r = {
+ .type = BIT(BCH_EXTENT_ENTRY_rebalance),
+#define x(_name) \
+ ._name = opts->_name, \
+ ._name##_from_inode = opts->_name##_from_inode,
+ BCH_REBALANCE_OPTS()
+#undef x
+ };
+
+ if (r.background_target &&
+ !bch2_target_accepts_data(c, BCH_DATA_user, r.background_target))
+ r.background_target = 0;
+
+ return r;
+};
+
u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *);
int bch2_get_update_rebalance_opts(struct btree_trans *,
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 93ba4f4e47ca..376fd0a6e868 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -381,6 +381,8 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans,
not_found:
if (flags & BTREE_TRIGGER_check_repair) {
ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false);
+ if (ret == -BCH_ERR_missing_indirect_extent)
+ ret = 0;
if (ret)
goto err;
}
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index ea0a18364751..b86ec013d7d7 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -180,9 +180,9 @@ enum bch_fsck_flags {
x(ptr_crc_nonce_mismatch, 162, 0) \
x(ptr_stripe_redundant, 163, 0) \
x(reservation_key_nr_replicas_invalid, 164, 0) \
- x(reflink_v_refcount_wrong, 165, 0) \
+ x(reflink_v_refcount_wrong, 165, FSCK_AUTOFIX) \
x(reflink_v_pos_bad, 292, 0) \
- x(reflink_p_to_missing_reflink_v, 166, 0) \
+ x(reflink_p_to_missing_reflink_v, 166, FSCK_AUTOFIX) \
x(reflink_refcount_underflow, 293, 0) \
x(stripe_pos_bad, 167, 0) \
x(stripe_val_size_bad, 168, 0) \
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index e3d0475232e5..b7b96283c316 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -428,7 +428,7 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
bch2_bkey_get_iter_typed(trans, &snapshot_iter,
BTREE_ID_snapshots, POS(0, snapid),
0, snapshot);
- ret = bkey_err(subvol);
+ ret = bkey_err(snapshot);
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
"missing snapshot %u", snapid);
if (ret)
@@ -440,6 +440,11 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter,
BTREE_ID_snapshot_trees, POS(0, treeid),
0, snapshot_tree);
+ ret = bkey_err(snapshot_tree);
+ bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+ "missing snapshot tree %u", treeid);
+ if (ret)
+ goto err;
if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) {
struct bkey_i_snapshot_tree *snapshot_tree_mut =
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index d97ea7bd1171..6d97d412fed9 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -411,6 +411,17 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
return ret;
}
+bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
+{
+ bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
+
+ bch2_journal_halt_locked(&c->journal);
+ bch2_fs_read_only_async(c);
+
+ wake_up(&bch2_read_only_wait);
+ return ret;
+}
+
static int bch2_fs_read_write_late(struct bch_fs *c)
{
int ret;
diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
index fa6d52216510..04f8287eff5c 100644
--- a/fs/bcachefs/super.h
+++ b/fs/bcachefs/super.h
@@ -29,6 +29,7 @@ int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
+bool bch2_fs_emergency_read_only_locked(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *);
int bch2_fs_read_write(struct bch_fs *);
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 56a5a7fbc0fd..c1b51009edf6 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail,
TP_ARGS(c, str)
);
-TRACE_EVENT(discard_buckets,
+DECLARE_EVENT_CLASS(discard_buckets_class,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, const char *err),
TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
@@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets,
__entry->err)
);
+DEFINE_EVENT(discard_buckets_class, discard_buckets,
+ TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
+ u64 need_journal_commit, u64 discarded, const char *err),
+ TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+);
+
+DEFINE_EVENT(discard_buckets_class, discard_buckets_fast,
+ TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
+ u64 need_journal_commit, u64 discarded, const char *err),
+ TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+);
+
TRACE_EVENT(bucket_invalidate,
TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
TP_ARGS(c, dev, bucket, sectors),
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a2cac9d0a1a9..b2fae67f8fa3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -523,8 +523,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
u64 end;
u32 len;
- /* For now only order 0 folios are supported for data. */
- ASSERT(folio_order(folio) == 0);
btrfs_debug(fs_info,
"%s: bi_sector=%llu, err=%d, mirror=%u",
__func__, bio->bi_iter.bi_sector, bio->bi_status,
@@ -552,7 +550,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> folio_shift(folio);
/*
* Zero out the remaining part if this range straddles
@@ -561,9 +558,11 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* Here we should only zero the range inside the folio,
* not touch anything else.
*
- * NOTE: i_size is exclusive while end is inclusive.
+ * NOTE: i_size is exclusive while end is inclusive and
+ * folio_contains() takes PAGE_SIZE units.
*/
- if (folio_index(folio) == end_index && i_size <= end) {
+ if (folio_contains(folio, i_size >> PAGE_SHIFT) &&
+ i_size <= end) {
u32 zero_start = max(offset_in_folio(folio, i_size),
offset_in_folio(folio, start));
u32 zero_len = offset_in_folio(folio, end) + 1 -
@@ -899,7 +898,6 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode,
u64 len, struct extent_map **em_cached)
{
struct extent_map *em;
- struct extent_state *cached_state = NULL;
ASSERT(em_cached);
@@ -915,14 +913,12 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode,
*em_cached = NULL;
}
- btrfs_lock_and_flush_ordered_range(inode, start, start + len - 1, &cached_state);
em = btrfs_get_extent(inode, folio, start, len);
if (!IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
}
- unlock_extent(&inode->io_tree, start, start + len - 1, &cached_state);
return em;
}
@@ -956,7 +952,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
return ret;
}
- if (folio->index == last_byte >> folio_shift(folio)) {
+ if (folio_contains(folio, last_byte >> PAGE_SHIFT)) {
size_t zero_offset = offset_in_folio(folio, last_byte);
if (zero_offset) {
@@ -1079,11 +1075,18 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
int btrfs_read_folio(struct file *file, struct folio *folio)
{
+ struct btrfs_inode *inode = folio_to_inode(folio);
+ const u64 start = folio_pos(folio);
+ const u64 end = start + folio_size(folio) - 1;
+ struct extent_state *cached_state = NULL;
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
struct extent_map *em_cached = NULL;
int ret;
+ btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state);
ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
+ unlock_extent(&inode->io_tree, start, end, &cached_state);
+
free_extent_map(em_cached);
/*
@@ -2380,12 +2383,20 @@ void btrfs_readahead(struct readahead_control *rac)
{
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD };
struct folio *folio;
+ struct btrfs_inode *inode = BTRFS_I(rac->mapping->host);
+ const u64 start = readahead_pos(rac);
+ const u64 end = start + readahead_length(rac) - 1;
+ struct extent_state *cached_state = NULL;
struct extent_map *em_cached = NULL;
u64 prev_em_start = (u64)-1;
+ btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state);
+
while ((folio = readahead_folio(rac)) != NULL)
btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
+ unlock_extent(&inode->io_tree, start, end, &cached_state);
+
if (em_cached)
free_extent_map(em_cached);
submit_one_bio(&bio_ctrl);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..ed3c0d6546c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
- loff_t start_pos;
/*
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
@@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
inode_inc_iversion(inode);
}
- start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
- if (start_pos > oldsize) {
+ if (pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
diff --git a/fs/dcache.c b/fs/dcache.c
index 96b21a47312e..e3634916ffb9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1700,7 +1700,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
dentry->d_flags = 0;
- lockref_init(&dentry->d_lockref, 1);
+ lockref_init(&dentry->d_lockref);
seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = dentry;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 29f8963bb523..d771e06db738 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -726,7 +726,7 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
if (IS_ERR(pcl))
return PTR_ERR(pcl);
- lockref_init(&pcl->lockref, 1); /* one ref for this request */
+ lockref_init(&pcl->lockref); /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
diff --git a/fs/file_table.c b/fs/file_table.c
index f0291a66f9db..5c00dc38558d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
* refcount bumps we should reinitialize the reused file first.
*/
file_ref_init(&f->f_ref, 1);
+ /*
+ * Disable permission and pre-content events for all files by default.
+ * They may be enabled later by file_set_fsnotify_mode_from_watchers().
+ */
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM);
return 0;
}
@@ -375,7 +380,13 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
if (IS_ERR(file)) {
ihold(inode);
path_put(&path);
+ return file;
}
+ /*
+ * Disable all fsnotify events for pseudo files by default.
+ * They may be enabled by caller with file_set_fsnotify_mode().
+ */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY);
return file;
}
EXPORT_SYMBOL(alloc_file_pseudo);
@@ -400,6 +411,11 @@ struct file *alloc_file_pseudo_noaccount(struct inode *inode,
return file;
}
file_init_path(file, &path, fops);
+ /*
+ * Disable all fsnotify events for pseudo files by default.
+ * They may be enabled by caller with file_set_fsnotify_mode().
+ */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY);
return file;
}
EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8c4c1f871a88..65c07aa95718 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1201,8 +1201,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
if (glops->go_instantiate)
gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED);
gl->gl_name = name;
+ lockref_init(&gl->gl_lockref);
lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
- gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 04cadc02e5a6..0727f60ad028 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
- spin_lock_init(&gl->gl_lockref.lock);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58bc5013ca49..2298e06797ac 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -236,7 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str
return NULL;
qd->qd_sbd = sdp;
- lockref_init(&qd->qd_lockref, 0);
+ lockref_init(&qd->qd_lockref);
qd->qd_id = qid;
qd->qd_slot = -1;
INIT_LIST_HEAD(&qd->qd_lru);
@@ -297,7 +297,6 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
spin_lock_bucket(hash);
*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
if (qd == NULL) {
- new_qd->qd_lockref.count++;
*qdp = new_qd;
list_add(&new_qd->qd_list, &sdp->sd_quota_list);
hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
@@ -1450,6 +1449,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
if (qd == NULL)
goto fail_brelse;
+ qd->qd_lockref.count = 0;
set_bit(QDF_CHANGE, &qd->qd_flags);
qd->qd_change = qc_change;
qd->qd_slot = slot;
diff --git a/fs/namespace.c b/fs/namespace.c
index a3ed3f2980cb..8f1000f9f3df 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5087,30 +5087,29 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
{
struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb;
+ size_t start = seq->count;
int err;
- if (sb->s_op->show_options) {
- size_t start = seq->count;
-
- err = security_sb_show_options(seq, sb);
- if (err)
- return err;
+ err = security_sb_show_options(seq, sb);
+ if (err)
+ return err;
+ if (sb->s_op->show_options) {
err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err)
return err;
+ }
- if (unlikely(seq_has_overflowed(seq)))
- return -EAGAIN;
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
- if (seq->count == start)
- return 0;
+ if (seq->count == start)
+ return 0;
- /* skip leading comma */
- memmove(seq->buf + start, seq->buf + start + 1,
- seq->count - start - 1);
- seq->count--;
- }
+ /* skip leading comma */
+ memmove(seq->buf + start, seq->buf + start + 1,
+ seq->count - start - 1);
+ seq->count--;
return 0;
}
@@ -5191,39 +5190,45 @@ static int statmount_string(struct kstatmount *s, u64 flag)
size_t kbufsize;
struct seq_file *seq = &s->seq;
struct statmount *sm = &s->sm;
- u32 start = seq->count;
+ u32 start, *offp;
+
+ /* Reserve an empty string at the beginning for any unset offsets */
+ if (!seq->count)
+ seq_putc(seq, 0);
+
+ start = seq->count;
switch (flag) {
case STATMOUNT_FS_TYPE:
- sm->fs_type = start;
+ offp = &sm->fs_type;
ret = statmount_fs_type(s, seq);
break;
case STATMOUNT_MNT_ROOT:
- sm->mnt_root = start;
+ offp = &sm->mnt_root;
ret = statmount_mnt_root(s, seq);
break;
case STATMOUNT_MNT_POINT:
- sm->mnt_point = start;
+ offp = &sm->mnt_point;
ret = statmount_mnt_point(s, seq);
break;
case STATMOUNT_MNT_OPTS:
- sm->mnt_opts = start;
+ offp = &sm->mnt_opts;
ret = statmount_mnt_opts(s, seq);
break;
case STATMOUNT_OPT_ARRAY:
- sm->opt_array = start;
+ offp = &sm->opt_array;
ret = statmount_opt_array(s, seq);
break;
case STATMOUNT_OPT_SEC_ARRAY:
- sm->opt_sec_array = start;
+ offp = &sm->opt_sec_array;
ret = statmount_opt_sec_array(s, seq);
break;
case STATMOUNT_FS_SUBTYPE:
- sm->fs_subtype = start;
+ offp = &sm->fs_subtype;
statmount_fs_subtype(s, seq);
break;
case STATMOUNT_SB_SOURCE:
- sm->sb_source = start;
+ offp = &sm->sb_source;
ret = statmount_sb_source(s, seq);
break;
default:
@@ -5251,6 +5256,7 @@ static int statmount_string(struct kstatmount *s, u64 flag)
seq->buf[seq->count++] = '\0';
sm->mask |= flag;
+ *offp = start;
return 0;
}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 0e552d873eaa..fb9b1656a287 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -446,11 +446,20 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
struct nfsd_file, nf_gc);
struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+ struct svc_serv *serv;
spin_lock(&l->lock);
list_move_tail(&nf->nf_gc, &l->freeme);
spin_unlock(&l->lock);
- svc_wake_up(nn->nfsd_serv);
+
+ /*
+ * The filecache laundrette is shut down after the
+ * nn->nfsd_serv pointer is cleared, but before the
+ * svc_serv is freed.
+ */
+ serv = nn->nfsd_serv;
+ if (serv)
+ svc_wake_up(serv);
}
}
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3be7201b1c..5fb202acb0fd 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -84,6 +84,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5e34e98db969..7b5433bd3019 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -76,6 +76,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 50e468bdb8d4..484077200c5d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
return status;
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
- if (unlikely(status || cb->cb_seq_status))
+ if (unlikely(status || cb->cb_status))
return status;
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
return -NFSERR_BAD_XDR;
@@ -1583,8 +1583,11 @@ nfsd4_run_cb_work(struct work_struct *work)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
- if (!clnt) {
- /* Callback channel broken, or client killed; give up: */
+ if (!clnt || clp->cl_state == NFSD4_COURTESY) {
+ /*
+ * Callback channel broken, client killed or
+ * nfs4_client in courtesy state; give up.
+ */
nfsd41_destroy_cb(cb);
return;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b7a0cfd05401..153eeea2c7c9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4459,10 +4459,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
} while (slot && --cnt > 0);
}
+
+out:
seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
seq->target_maxslots = session->se_target_maxslots;
-out:
switch (clp->cl_cb_state) {
case NFSD4_CB_DOWN:
seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 32019751a41e..aef474f1b84b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -380,8 +380,9 @@ __fh_verify(struct svc_rqst *rqstp,
error = check_nfsd_access(exp, rqstp, may_bypass_gss);
if (error)
goto out;
-
- svc_xprt_set_valid(rqstp->rq_xprt);
+ /* During LOCALIO call to fh_verify will be called with a NULL rqstp */
+ if (rqstp)
+ svc_xprt_set_valid(rqstp->rq_xprt);
/* Finally, check access permissions. */
error = nfsd_permission(cred, exp, dentry, access);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 8ee495a58d0a..fae1b6d397ea 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(fsnotify);
* Later, fsnotify permission hooks do not check if there are permission event
* watches, but that there were permission event watches at open time.
*/
-void file_set_fsnotify_mode(struct file *file)
+void file_set_fsnotify_mode_from_watchers(struct file *file)
{
struct dentry *dentry = file->f_path.dentry, *parent;
struct super_block *sb = dentry->d_sb;
@@ -665,7 +665,7 @@ void file_set_fsnotify_mode(struct file *file)
*/
if (likely(!fsnotify_sb_has_priority_watchers(sb,
FSNOTIFY_PRIO_CONTENT))) {
- file->f_mode |= FMODE_NONOTIFY_PERM;
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
return;
}
@@ -676,7 +676,7 @@ void file_set_fsnotify_mode(struct file *file)
if ((!d_is_dir(dentry) && !d_is_reg(dentry)) ||
likely(!fsnotify_sb_has_priority_watchers(sb,
FSNOTIFY_PRIO_PRE_CONTENT))) {
- file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
return;
}
@@ -686,19 +686,25 @@ void file_set_fsnotify_mode(struct file *file)
*/
mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask,
- FSNOTIFY_PRE_CONTENT_EVENTS)))
+ FSNOTIFY_PRE_CONTENT_EVENTS))) {
+ /* Enable pre-content events */
+ file_set_fsnotify_mode(file, 0);
return;
+ }
/* Is parent watching for pre-content events on this file? */
if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
parent = dget_parent(dentry);
p_mask = fsnotify_inode_watches_children(d_inode(parent));
dput(parent);
- if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)
+ if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) {
+ /* Enable pre-content events */
+ file_set_fsnotify_mode(file, 0);
return;
+ }
}
/* Nobody watching for pre-content events from this file */
- file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM);
}
#endif
diff --git a/fs/open.c b/fs/open.c
index 932e5a6de63b..1be20de9f283 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -905,7 +905,8 @@ static int do_dentry_open(struct file *f,
f->f_sb_err = file_sample_sb_err(f);
if (unlikely(f->f_flags & O_PATH)) {
- f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY;
+ f->f_mode = FMODE_PATH | FMODE_OPENED;
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY);
f->f_op = &empty_fops;
return 0;
}
@@ -935,10 +936,10 @@ static int do_dentry_open(struct file *f,
/*
* Set FMODE_NONOTIFY_* bits according to existing permission watches.
- * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't
- * change anything.
+ * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a
+ * pseudo file, this call will not change the mode.
*/
- file_set_fsnotify_mode(f);
+ file_set_fsnotify_mode_from_watchers(f);
error = fsnotify_open_perm(f);
if (error)
goto cleanup_all;
@@ -1122,7 +1123,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
if (!IS_ERR(f)) {
int error;
- f->f_mode |= FMODE_NONOTIFY;
+ file_set_fsnotify_mode(f, FMODE_NONOTIFY);
error = vfs_open(path, f);
if (error) {
fput(f);
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 049352f973de..63f9699ebac3 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
switch (cmd) {
case FS_IOC_GETVERSION:
case PIDFD_GET_CGROUP_NAMESPACE:
- case PIDFD_GET_INFO:
case PIDFD_GET_IPC_NAMESPACE:
case PIDFD_GET_MNT_NAMESPACE:
case PIDFD_GET_NET_NAMESPACE:
@@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
return true;
}
+ /* Extensible ioctls require some more careful checks. */
+ switch (_IOC_NR(cmd)) {
+ case _IOC_NR(PIDFD_GET_INFO):
+ /*
+ * Try to prevent performing a pidfd ioctl when someone
+ * erronously mistook the file descriptor for a pidfd.
+ * This is not perfect but will catch most cases.
+ */
+ return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO));
+ }
+
return false;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 94b59045ab44..ce1af7592780 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -960,6 +960,12 @@ int create_pipe_files(struct file **res, int flags)
res[1] = f;
stream_open(inode, res[0]);
stream_open(inode, res[1]);
+ /*
+ * Disable permission and pre-content events, but enable legacy
+ * inotify events for legacy users.
+ */
+ file_set_fsnotify_mode(res[0], FMODE_NONOTIFY_PERM);
+ file_set_fsnotify_mode(res[1], FMODE_NONOTIFY_PERM);
return 0;
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index a68434ad744a..ac1f890a0d54 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -357,7 +357,7 @@ struct smb_version_operations {
int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *);
void (*downgrade_oplock)(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache);
+ __u16 epoch, bool *purge_cache);
/* process transaction2 response */
bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
char *, int);
@@ -552,12 +552,12 @@ struct smb_version_operations {
/* if we can do cache read operations */
bool (*is_read_op)(__u32);
/* set oplock level for the inode */
- void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int,
- bool *);
+ void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch,
+ bool *purge_cache);
/* create lease context buffer for CREATE request */
char * (*create_lease_buf)(u8 *lease_key, u8 oplock);
/* parse lease context buffer and return oplock/epoch info */
- __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
+ __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey);
ssize_t (*copychunk_range)(const unsigned int,
struct cifsFileInfo *src_file,
struct cifsFileInfo *target_file,
@@ -1447,7 +1447,7 @@ struct cifs_fid {
__u8 create_guid[16];
__u32 access;
struct cifs_pending_open *pending_open;
- unsigned int epoch;
+ __u16 epoch;
#ifdef CONFIG_CIFS_DEBUG2
__u64 mid;
#endif /* CIFS_DEBUG2 */
@@ -1480,7 +1480,7 @@ struct cifsFileInfo {
bool oplock_break_cancelled:1;
bool status_file_deleted:1; /* file has been deleted */
bool offload:1; /* offload final part of _put to a wq */
- unsigned int oplock_epoch; /* epoch from the lease break */
+ __u16 oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
spinlock_t file_info_lock; /* protects four flag/count fields above */
@@ -1577,7 +1577,7 @@ struct cifsInodeInfo {
spinlock_t open_file_lock; /* protects openFileList */
__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
unsigned int oplock; /* oplock/lease level we have */
- unsigned int epoch; /* used to track lease state changes */
+ __u16 epoch; /* used to track lease state changes */
#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
#define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index dad521336b5e..f65a8a90ba27 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -150,25 +150,27 @@ again:
if (rc)
continue;
- if (tgt.flags & DFSREF_STORAGE_SERVER) {
- rc = cifs_mount_get_tcon(mnt_ctx);
- if (!rc)
- rc = cifs_is_path_remote(mnt_ctx);
+ rc = cifs_mount_get_tcon(mnt_ctx);
+ if (rc) {
+ if (tgt.server_type == DFS_TYPE_LINK &&
+ DFS_INTERLINK(tgt.flags))
+ rc = -EREMOTE;
+ } else {
+ rc = cifs_is_path_remote(mnt_ctx);
if (!rc) {
ref_walk_set_tgt_hint(rw);
break;
}
- if (rc != -EREMOTE)
- continue;
}
-
- rc = ref_walk_advance(rw);
- if (!rc) {
- rc = setup_dfs_ref(&tgt, rw);
- if (rc)
- break;
- ref_walk_mark_end(rw);
- goto again;
+ if (rc == -EREMOTE) {
+ rc = ref_walk_advance(rw);
+ if (!rc) {
+ rc = setup_dfs_ref(&tgt, rw);
+ if (rc)
+ break;
+ ref_walk_mark_end(rw);
+ goto again;
+ }
}
}
} while (rc && ref_walk_descend(rw));
diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h
index ed4cd7cf1ec6..e60f0a24a8a1 100644
--- a/fs/smb/client/dfs.h
+++ b/fs/smb/client/dfs.h
@@ -188,4 +188,11 @@ static inline void dfs_put_root_smb_sessions(struct list_head *head)
}
}
+static inline const char *dfs_ses_refpath(struct cifs_ses *ses)
+{
+ const char *path = ses->server->leaf_fullpath;
+
+ return path ? path + 1 : ERR_PTR(-ENOENT);
+}
+
#endif /* _CIFS_DFS_H */
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 5022bb1f122a..4dada26d56b5 100644
--- a/fs/smb/client/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
@@ -1136,33 +1136,19 @@ static bool is_ses_good(struct cifs_ses *ses)
return ret;
}
-static char *get_ses_refpath(struct cifs_ses *ses)
-{
- struct TCP_Server_Info *server = ses->server;
- char *path = ERR_PTR(-ENOENT);
-
- if (server->leaf_fullpath) {
- path = kstrdup(server->leaf_fullpath + 1, GFP_KERNEL);
- if (!path)
- path = ERR_PTR(-ENOMEM);
- }
- return path;
-}
-
/* Refresh dfs referral of @ses */
static void refresh_ses_referral(struct cifs_ses *ses)
{
struct cache_entry *ce;
unsigned int xid;
- char *path;
+ const char *path;
int rc = 0;
xid = get_xid();
- path = get_ses_refpath(ses);
+ path = dfs_ses_refpath(ses);
if (IS_ERR(path)) {
rc = PTR_ERR(path);
- path = NULL;
goto out;
}
@@ -1181,7 +1167,6 @@ static void refresh_ses_referral(struct cifs_ses *ses)
out:
free_xid(xid);
- kfree(path);
}
static int __refresh_tcon_referral(struct cifs_tcon *tcon,
@@ -1231,19 +1216,18 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh)
struct dfs_info3_param *refs = NULL;
struct cache_entry *ce;
struct cifs_ses *ses;
- unsigned int xid;
bool needs_refresh;
- char *path;
+ const char *path;
+ unsigned int xid;
int numrefs = 0;
int rc = 0;
xid = get_xid();
ses = tcon->ses;
- path = get_ses_refpath(ses);
+ path = dfs_ses_refpath(ses);
if (IS_ERR(path)) {
rc = PTR_ERR(path);
- path = NULL;
goto out;
}
@@ -1271,7 +1255,6 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh)
out:
free_xid(xid);
- kfree(path);
free_dfs_info_array(refs, numrefs);
}
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index 9756b876a75e..d6e2fb669c40 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -377,7 +377,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
static void
cifs_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
cifs_set_oplock_level(cinode, oplock);
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 77309217dab4..ec36bed54b0b 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -3904,22 +3904,22 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
static void
smb2_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
server->ops->set_oplock_level(cinode, oplock, 0, NULL);
}
static void
smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache);
+ __u16 epoch, bool *purge_cache);
static void
smb3_downgrade_oplock(struct TCP_Server_Info *server,
struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
unsigned int old_state = cinode->oplock;
- unsigned int old_epoch = cinode->epoch;
+ __u16 old_epoch = cinode->epoch;
unsigned int new_state;
if (epoch > old_epoch) {
@@ -3939,7 +3939,7 @@ smb3_downgrade_oplock(struct TCP_Server_Info *server,
static void
smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
oplock &= 0xFF;
cinode->lease_granted = false;
@@ -3963,7 +3963,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
static void
smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
char message[5] = {0};
unsigned int new_oplock = 0;
@@ -4000,7 +4000,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
static void
smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
- unsigned int epoch, bool *purge_cache)
+ __u16 epoch, bool *purge_cache)
{
unsigned int old_oplock = cinode->oplock;
@@ -4114,7 +4114,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
}
static __u8
-smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
+smb2_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key)
{
struct create_lease *lc = (struct create_lease *)buf;
@@ -4125,7 +4125,7 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
}
static __u8
-smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
+smb3_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key)
{
struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 40ad9e79437a..ed7812247ebc 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -2169,7 +2169,7 @@ tcon_exit:
tcon_error_exit:
if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME)
- cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
+ cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree);
goto tcon_exit;
}
@@ -2329,7 +2329,7 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info,
int smb2_parse_contexts(struct TCP_Server_Info *server,
struct kvec *rsp_iov,
- unsigned int *epoch,
+ __u16 *epoch,
char *lease_key, __u8 *oplock,
struct smb2_file_all_info *buf,
struct create_posix_rsp *posix)
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 2336dfb23f36..4662c7e2d259 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -283,7 +283,7 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
int smb2_parse_contexts(struct TCP_Server_Info *server,
struct kvec *rsp_iov,
- unsigned int *epoch,
+ __u16 *epoch,
char *lease_key, __u8 *oplock,
struct smb2_file_all_info *buf,
struct create_posix_rsp *posix);
diff --git a/fs/stat.c b/fs/stat.c
index 2c0e111a098a..f13308bfdc98 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -281,6 +281,8 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
u32 request_mask)
{
int error = vfs_getattr(path, stat, request_mask, flags);
+ if (error)
+ return error;
if (request_mask & STATX_MNT_ID_UNIQUE) {
stat->mnt_id = real_mount(path->mnt)->mnt_id_unique;
@@ -302,7 +304,7 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat,
if (S_ISBLK(stat->mode))
bdev_statx(path, stat, request_mask);
- return error;
+ return 0;
}
static int vfs_statx_fd(int fd, int flags, struct kstat *stat,
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index e95b8a48d8a0..1d94bb784108 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -21,7 +21,8 @@
#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
-static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375";
+static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376',
+ '\375' };
static int follow_symlinks;
module_param(follow_symlinks, int, 0444);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 54504013c749..02a4adb4a999 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -1038,6 +1038,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*(.discard) \
*(.discard.*) \
*(.export_symbol) \
+ *(.no_trim_symbol) \
*(.modinfo) \
/* ld.bfd warns about .gnu.version* even when not emitted */ \
*(.gnu.version*) \
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index f77fe1531cf8..9732f514566d 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -32,6 +32,7 @@
#include <linux/dynamic_debug.h>
#include <drm/drm.h>
+#include <drm/drm_device.h>
struct debugfs_regset32;
struct drm_device;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b087de2f3e94..200fd3c5bc70 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -191,6 +191,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
__v; \
})
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
+#else /* __CHECKER__ */
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
+#endif /* __CHECKER__ */
+
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __is_array(a) (!__same_type((a), &(a)[0]))
+#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \
+ "must be array")
+
+#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1)
+#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \
+ "must be byte array")
+
+/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
+#define __must_be_cstr(p) \
+ __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)")
+
#endif /* __KERNEL__ */
/**
@@ -231,19 +250,6 @@ static inline void *offset_to_ptr(const int *off)
#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym))
-#ifdef __CHECKER__
-#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0)
-#else /* __CHECKER__ */
-#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
-#endif /* __CHECKER__ */
-
-/* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array")
-
-/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */
-#define __must_be_cstr(p) \
- __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)")
-
/*
* This returns a constant expression while determining if an argument is
* a constant expression, most importantly without evaluating the argument.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be3ad155ec9f..2c3b2f8a621f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -222,7 +222,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_FSNOTIFY_HSM(mode) 0
#endif
-
/*
* Attribute flags. These should be or-ed together to figure out what
* has been changed!
@@ -791,6 +790,19 @@ struct inode {
static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
{
+ int testlen;
+
+ /*
+ * TODO: patch it into a debug-only check if relevant macros show up.
+ * In the meantime, since we are suffering strlen even on production kernels
+ * to find the right length, do a fixup if the wrong value got passed.
+ */
+ testlen = strlen(link);
+ if (testlen != linklen) {
+ WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)",
+ link, linklen, testlen);
+ linklen = testlen;
+ }
inode->i_link = link;
inode->i_linklen = linklen;
inode->i_opflags |= IOP_CACHED_LINK;
@@ -3140,6 +3152,12 @@ static inline void exe_file_allow_write_access(struct file *exe_file)
allow_write_access(exe_file);
}
+static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode)
+{
+ file->f_mode &= ~FMODE_FSNOTIFY_MASK;
+ file->f_mode |= mode;
+}
+
static inline bool inode_is_open_for_write(const struct inode *inode)
{
return atomic_read(&inode->i_writecount) > 0;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 1a9ef8f6784d..6a33288bd6a1 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -129,7 +129,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
-void file_set_fsnotify_mode(struct file *file);
+void file_set_fsnotify_mode_from_watchers(struct file *file);
/*
* fsnotify_file_area_perm - permission hook before access to file range
@@ -213,7 +213,7 @@ static inline int fsnotify_open_perm(struct file *file)
}
#else
-static inline void file_set_fsnotify_mode(struct file *file)
+static inline void file_set_fsnotify_mode_from_watchers(struct file *file)
{
}
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index c31fd1dba3bd..2b2af24d2a43 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -244,6 +244,7 @@ enum i2c_driver_flags {
* @id_table: List of I2C devices supported by this driver
* @detect: Callback for device detection
* @address_list: The I2C addresses to probe (for detect)
+ * @clients: List of detected clients we created (for i2c-core use only)
* @flags: A bitmask of flags defined in &enum i2c_driver_flags
*
* The driver.owner field should be set to the module owner of this driver.
@@ -298,6 +299,7 @@ struct i2c_driver {
/* Device detection callback for automatic device creation */
int (*detect)(struct i2c_client *client, struct i2c_board_info *info);
const unsigned short *address_list;
+ struct list_head clients;
u32 flags;
};
@@ -313,6 +315,8 @@ struct i2c_driver {
* @dev: Driver model device node for the slave.
* @init_irq: IRQ that was set at initialization
* @irq: indicates the IRQ generated by this device (if any)
+ * @detected: member of an i2c_driver.clients list or i2c-core's
+ * userspace_devices list
* @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter
* calls it to pass on slave events to the slave driver.
* @devres_group_id: id of the devres group that will be created for resources
@@ -332,8 +336,6 @@ struct i2c_client {
#define I2C_CLIENT_SLAVE 0x20 /* we are the slave */
#define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */
#define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */
-#define I2C_CLIENT_AUTO 0x100 /* client was auto-detected */
-#define I2C_CLIENT_USER 0x200 /* client was userspace-created */
#define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */
/* Must match I2C_M_STOP|IGNORE_NAK */
@@ -345,6 +347,7 @@ struct i2c_client {
struct device dev; /* the device structure */
int init_irq; /* irq set at initialization */
int irq; /* irq issued by device */
+ struct list_head detected;
#if IS_ENABLED(CONFIG_I2C_SLAVE)
i2c_slave_cb_t slave_cb; /* callback for slave mode */
#endif
@@ -751,6 +754,9 @@ struct i2c_adapter {
char name[48];
struct completion dev_released;
+ struct mutex userspace_clients_lock;
+ struct list_head userspace_clients;
+
struct i2c_bus_recovery_info *bus_recovery_info;
const struct i2c_adapter_quirks *quirks;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3cb9a32a6330..f34f4cfaa513 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1615,7 +1615,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
-int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
void kvm_arch_create_vm_debugfs(struct kvm *kvm);
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index c39f119659ba..676721ee878d 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -37,12 +37,13 @@ struct lockref {
/**
* lockref_init - Initialize a lockref
* @lockref: pointer to lockref structure
- * @count: initial count
+ *
+ * Initializes @lockref->count to 1.
*/
-static inline void lockref_init(struct lockref *lockref, unsigned int count)
+static inline void lockref_init(struct lockref *lockref)
{
spin_lock_init(&lockref->lock);
- lockref->count = count;
+ lockref->count = 1;
}
void lockref_get(struct lockref *lockref);
diff --git a/include/linux/module.h b/include/linux/module.h
index 23792d5d7b74..30e5b19bafa9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -306,7 +306,10 @@ extern int modules_disabled; /* for sysctl */
/* Get/put a kernel symbol (calls must be symmetric) */
void *__symbol_get(const char *symbol);
void *__symbol_get_gpl(const char *symbol);
-#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x))))
+#define symbol_get(x) ({ \
+ static const char __notrim[] \
+ __used __section(".no_trim_symbol") = __stringify(x); \
+ (typeof(&x))(__symbol_get(__stringify(x))); })
/* modules using other modules: kdb wants to see this. */
struct module_use {
diff --git a/include/linux/string.h b/include/linux/string.h
index 86d5d352068b..f8e21e80942f 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -414,7 +414,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* must be discoverable by the compiler.
*/
#define strtomem_pad(dest, src, pad) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
@@ -437,7 +438,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* must be discoverable by the compiler.
*/
#define strtomem(dest, src) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
\
BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \
@@ -456,7 +458,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* Note that sizes of @dest and @src must be known at compile-time.
*/
#define memtostr(dest, src) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
const size_t _src_chars = strnlen(src, _src_len); \
const size_t _copy_len = min(_dest_len - 1, _src_chars); \
@@ -481,7 +484,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
* Note that sizes of @dest and @src must be known at compile-time.
*/
#define memtostr_pad(dest, src) do { \
- const size_t _dest_len = __builtin_object_size(dest, 1); \
+ const size_t _dest_len = __must_be_byte_array(dest) + \
+ ARRAY_SIZE(dest); \
const size_t _src_len = __builtin_object_size(src, 1); \
const size_t _src_chars = strnlen(src, _src_len); \
const size_t _copy_len = min(_dest_len - 1, _src_chars); \
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index efe5de6ce208..aaa4f3bc688b 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -411,13 +411,20 @@ struct drm_amdgpu_gem_userptr {
/* GFX12 and later: */
#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0
#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7
-/* These are DCC recompression setting for memory management: */
+/* These are DCC recompression settings for memory management: */
#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3
#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */
#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5
#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */
#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8
#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
+/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
+ * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */
+#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14
+#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1
+/* bit gap */
+#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63
+#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1
/* Set/Get helpers for tiling flags. */
#define AMDGPU_TILING_SET(field, value) \
diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h
index 89672ad8c3bb..f151feb0ca8c 100644
--- a/include/ufs/ufs.h
+++ b/include/ufs/ufs.h
@@ -385,8 +385,8 @@ enum {
/* Possible values for dExtendedUFSFeaturesSupport */
enum {
- UFS_DEV_LOW_TEMP_NOTIF = BIT(4),
- UFS_DEV_HIGH_TEMP_NOTIF = BIT(5),
+ UFS_DEV_HIGH_TEMP_NOTIF = BIT(4),
+ UFS_DEV_LOW_TEMP_NOTIF = BIT(5),
UFS_DEV_EXT_TEMP_NOTIF = BIT(6),
UFS_DEV_HPB_SUPPORT = BIT(7),
UFS_DEV_WRITE_BOOSTER_SUP = BIT(8),
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 650ff238cd74..8bf31e6ca4e5 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1309,7 +1309,6 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg)
void ufshcd_enable_irq(struct ufs_hba *hba);
void ufshcd_disable_irq(struct ufs_hba *hba);
int ufshcd_alloc_host(struct device *, struct ufs_hba **);
-void ufshcd_dealloc_host(struct ufs_hba *);
int ufshcd_hba_enable(struct ufs_hba *hba);
int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int);
int ufshcd_link_recovery(struct ufs_hba *hba);
diff --git a/io_uring/futex.c b/io_uring/futex.c
index 3159a2b7eeca..43e2143255f5 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -338,7 +338,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
hlist_add_head(&req->hash_node, &ctx->futex_list);
io_ring_submit_unlock(ctx, issue_flags);
- futex_queue(&ifd->q, hb);
+ futex_queue(&ifd->q, hb, NULL);
return IOU_ISSUE_SKIP_COMPLETE;
}
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index ebdd76b4ecbb..3db8567f5a44 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -532,7 +532,8 @@ void futex_q_unlock(struct futex_hash_bucket *hb)
futex_hb_waiters_dec(hb);
}
-void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
+void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
+ struct task_struct *task)
{
int prio;
@@ -548,7 +549,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
plist_node_init(&q->list, prio);
plist_add(&q->list, &hb->chain);
- q->task = current;
+ q->task = task;
}
/**
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 99b32e728c4a..6b2f4c7eb720 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -285,13 +285,15 @@ static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
}
extern void __futex_unqueue(struct futex_q *q);
-extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
+extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
+ struct task_struct *task);
extern int futex_unqueue(struct futex_q *q);
/**
* futex_queue() - Enqueue the futex_q on the futex_hash_bucket
* @q: The futex_q to enqueue
* @hb: The destination hash bucket
+ * @task: Task queueing this futex
*
* The hb->lock must be held by the caller, and is released here. A call to
* futex_queue() is typically paired with exactly one call to futex_unqueue(). The
@@ -299,11 +301,14 @@ extern int futex_unqueue(struct futex_q *q);
* or nothing if the unqueue is done as part of the wake process and the unqueue
* state is implicit in the state of woken task (see futex_wait_requeue_pi() for
* an example).
+ *
+ * Note that @task may be NULL, for async usage of futexes.
*/
-static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
+static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
+ struct task_struct *task)
__releases(&hb->lock)
{
- __futex_queue(q, hb);
+ __futex_queue(q, hb, task);
spin_unlock(&hb->lock);
}
diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
index daea650b16f5..7a941845f7ee 100644
--- a/kernel/futex/pi.c
+++ b/kernel/futex/pi.c
@@ -982,7 +982,7 @@ retry_private:
/*
* Only actually queue now that the atomic ops are done:
*/
- __futex_queue(&q, hb);
+ __futex_queue(&q, hb, current);
if (trylock) {
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index eb86a7ade06a..25877d4f2f8f 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -349,7 +349,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
- futex_queue(q, hb);
+ futex_queue(q, hb, current);
/* Arm the timer */
if (timeout)
@@ -460,7 +460,7 @@ retry:
* next futex. Queue each futex at this moment so hb can
* be unlocked.
*/
- futex_queue(q, hb);
+ futex_queue(q, hb, current);
continue;
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index fd7e85220715..ef047add7f9e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1262,6 +1262,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
if (task_has_dl_policy(p)) {
P(dl.runtime);
P(dl.deadline);
+ } else if (fair_policy(p->policy)) {
+ P(se.slice);
}
#ifdef CONFIG_SCHED_CLASS_EXT
__PS("ext.enabled", task_on_scx(p));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ce2e94ccad0c..1c0ef435a7aa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5385,6 +5385,15 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void set_delayed(struct sched_entity *se)
{
se->sched_delayed = 1;
+
+ /*
+ * Delayed se of cfs_rq have no tasks queued on them.
+ * Do not adjust h_nr_runnable since dequeue_entities()
+ * will account it for blocked tasks.
+ */
+ if (!entity_is_task(se))
+ return;
+
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
@@ -5397,6 +5406,16 @@ static void set_delayed(struct sched_entity *se)
static void clear_delayed(struct sched_entity *se)
{
se->sched_delayed = 0;
+
+ /*
+ * Delayed se of cfs_rq have no tasks queued on them.
+ * Do not adjust h_nr_runnable since a dequeue has
+ * already accounted for it or an enqueue of a task
+ * below it will account for it in enqueue_task_fair().
+ */
+ if (!entity_is_task(se))
+ return;
+
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f59381c4a2ff..7bbb408431eb 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -749,6 +749,15 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
if (WARN_ON_ONCE(!fprog))
return false;
+ /* Our single exception to filtering. */
+#ifdef __NR_uretprobe
+#ifdef SECCOMP_ARCH_COMPAT
+ if (sd->arch == SECCOMP_ARCH_NATIVE)
+#endif
+ if (sd->nr == __NR_uretprobe)
+ return true;
+#endif
+
for (pc = 0; pc < fprog->len; pc++) {
struct sock_filter *insn = &fprog->filter[pc];
u16 code = insn->code;
@@ -1023,6 +1032,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
*/
static const int mode1_syscalls[] = {
__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
+#ifdef __NR_uretprobe
+ __NR_uretprobe,
+#endif
-1, /* negative terminated */
};
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 77d9566d3aa6..2a7802ec480c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -373,10 +373,10 @@ void clocksource_verify_percpu(struct clocksource *cs)
cpumask_clear(&cpus_ahead);
cpumask_clear(&cpus_behind);
cpus_read_lock();
- preempt_disable();
+ migrate_disable();
clocksource_verify_choose_cpus();
if (cpumask_empty(&cpus_chosen)) {
- preempt_enable();
+ migrate_enable();
cpus_read_unlock();
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
return;
@@ -384,6 +384,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
testcpu = smp_processor_id();
pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
+ preempt_disable();
for_each_cpu(cpu, &cpus_chosen) {
if (cpu == testcpu)
continue;
@@ -403,6 +404,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
cs_nsec_min = cs_nsec;
}
preempt_enable();
+ migrate_enable();
cpus_read_unlock();
if (!cpumask_empty(&cpus_ahead))
pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 9cb9b6584ea1..2f6330831f08 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -1675,6 +1675,9 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
} while (i < tmigr_hierarchy_levels);
+ /* Assert single root */
+ WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top]));
+
while (i > 0) {
group = stack[--i];
@@ -1716,7 +1719,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
WARN_ON_ONCE(top == 0);
lvllist = &tmigr_level_list[top];
- if (group->num_children == 1 && list_is_singular(lvllist)) {
+
+ /*
+ * Newly created root level should have accounted the upcoming
+ * CPU's child group and pre-accounted the old root.
+ */
+ if (group->num_children == 2 && list_is_singular(lvllist)) {
/*
* The target CPU must never do the prepare work, except
* on early boot when the boot CPU is the target. Otherwise
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 54d850997c0a..136c750b0b4d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
* returning from the function.
*/
if (ftrace_graph_notrace_addr(trace->func)) {
- *task_var |= TRACE_GRAPH_NOTRACE_BIT;
+ *task_var |= TRACE_GRAPH_NOTRACE;
/*
* Need to return 1 to have the return called
* that will clear the NOTRACE bit.
diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c
index fbe910c9c825..135322592faf 100644
--- a/lib/stackinit_kunit.c
+++ b/lib/stackinit_kunit.c
@@ -75,8 +75,10 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size,
*/
#ifdef CONFIG_M68K
#define FILL_SIZE_STRING 8
+#define FILL_SIZE_ARRAY 2
#else
#define FILL_SIZE_STRING 16
+#define FILL_SIZE_ARRAY 8
#endif
#define INIT_CLONE_SCALAR /**/
@@ -345,11 +347,11 @@ union test_small_start {
short three;
unsigned long four;
struct big_struct {
- unsigned long array[8];
+ unsigned long array[FILL_SIZE_ARRAY];
} big;
};
-/* Mismatched sizes, with one and two being small */
+/* Mismatched sizes, with three and four being small */
union test_small_end {
short one;
unsigned long two;
diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index 6746be07e222..e08aae35351a 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig
@@ -1,8 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config BT_HIDP
tristate "HIDP protocol support"
- depends on BT_BREDR && INPUT && HID_SUPPORT
- select HID
+ depends on BT_BREDR && HID
help
HIDP (Human Interface Device Protocol) is a transport layer
for HID reports. HIDP is required for the Bluetooth Human
diff --git a/net/socket.c b/net/socket.c
index 262a28b59c7f..28bae5a94234 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
sock->file = file;
file->private_data = sock;
stream_open(SOCK_INODE(sock), file);
+ /*
+ * Disable permission and pre-content events, but enable legacy
+ * inotify events for legacy users.
+ */
+ file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
return file;
}
EXPORT_SYMBOL(sock_alloc_file);
diff --git a/rust/Makefile b/rust/Makefile
index 8fcfd60447bc..ea3849eb78f6 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -144,7 +144,7 @@ rusttestlib-kernel: private rustc_target_flags = --extern ffi \
--extern bindings --extern uapi
rusttestlib-kernel: $(src)/kernel/lib.rs \
rusttestlib-bindings rusttestlib-uapi rusttestlib-build_error \
- $(obj)/libmacros.so $(obj)/bindings.o FORCE
+ $(obj)/$(libmacros_name) $(obj)/bindings.o FORCE
+$(call if_changed,rustc_test_library)
rusttestlib-bindings: private rustc_target_flags = --extern ffi
@@ -240,6 +240,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \
-fzero-call-used-regs=% -fno-stack-clash-protection \
-fno-inline-functions-called-once -fsanitize=bounds-strict \
-fstrict-flex-arrays=% -fmin-function-alignment=% \
+ -fzero-init-padding-bits=% \
--param=% --param asan-%
# Derived from `scripts/Makefile.clang`.
@@ -331,7 +332,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ;
$(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE
$(call if_changed_dep,bindgen)
-rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }'
+rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ && $$3!~/__odr_asan/ { printf $(2),$$3 }'
quiet_cmd_exports = EXPORTS $@
cmd_exports = \
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 3f9236c1c9d5..7fd1ea8265a5 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -870,7 +870,7 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
/// use kernel::{types::Opaque, init::pin_init_from_closure};
/// #[repr(C)]
/// struct RawFoo([u8; 16]);
- /// extern {
+ /// extern "C" {
/// fn init_foo(_: *mut RawFoo);
/// }
///
diff --git a/samples/hid/Makefile b/samples/hid/Makefile
index 8ea59e9631a3..db5a077c77fc 100644
--- a/samples/hid/Makefile
+++ b/samples/hid/Makefile
@@ -40,16 +40,17 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic
endif
endif
-TPROGS_CFLAGS += -Wall -O2
-TPROGS_CFLAGS += -Wmissing-prototypes
-TPROGS_CFLAGS += -Wstrict-prototypes
+COMMON_CFLAGS += -Wall -O2
+COMMON_CFLAGS += -Wmissing-prototypes
+COMMON_CFLAGS += -Wstrict-prototypes
+TPROGS_CFLAGS += $(COMMON_CFLAGS)
TPROGS_CFLAGS += -I$(objtree)/usr/include
TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE)
TPROGS_CFLAGS += -I$(srctree)/tools/include
ifdef SYSROOT
-TPROGS_CFLAGS += --sysroot=$(SYSROOT)
+COMMON_CFLAGS += --sysroot=$(SYSROOT)
TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib
endif
@@ -112,7 +113,7 @@ clean:
$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
# Fix up variables inherited from Kbuild that tools/ build system won't like
- $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
+ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \
LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(HID_SAMPLES_PATH)/../../ \
O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
$@ install_headers
@@ -163,7 +164,7 @@ $(obj)/hid_surface_dial.o: $(obj)/hid_surface_dial.skel.h
VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
$(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \
- $(abspath ./vmlinux)
+ $(abspath $(objtree)/vmlinux)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index eb719f6d8d53..dc081cf46d21 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -31,6 +31,11 @@ KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
ifdef CONFIG_CC_IS_CLANG
# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
KBUILD_CFLAGS += -Wno-gnu
+
+# Clang checks for overflow/truncation with '%p', while GCC does not:
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219
+KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf)
else
# gcc inanely warns about local variables called 'main'
@@ -105,11 +110,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
-else
-# Clang checks for overflow/truncation with '%p', while GCC does not:
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219
-KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf)
endif
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
@@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
KBUILD_CFLAGS += -Wno-enum-compare-conditional
-KBUILD_CFLAGS += -Wno-enum-enum-conversion
endif
endif
@@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers
KBUILD_CFLAGS += -Wno-type-limits
KBUILD_CFLAGS += -Wno-shift-negative-value
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Wno-enum-enum-conversion
+endif
+
ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += -Wno-maybe-uninitialized
endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ad55ef201aac..cad20f0e66ee 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -305,7 +305,7 @@ endef
# These are shared by some Makefile.* files.
ifdef CONFIG_LTO_CLANG
-# Run $(LD) here to covert LLVM IR to ELF in the following cases:
+# Run $(LD) here to convert LLVM IR to ELF in the following cases:
# - when this object needs objtool processing, as objtool cannot process LLVM IR
# - when this is a single-object module, as modpost cannot process LLVM IR
cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@)
diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs
index 0d00ac3723b5..4fd6b6ab3e32 100644
--- a/scripts/generate_rust_target.rs
+++ b/scripts/generate_rust_target.rs
@@ -165,6 +165,18 @@ impl KernelConfig {
let option = "CONFIG_".to_owned() + option;
self.0.contains_key(&option)
}
+
+ /// Is the rustc version at least `major.minor.patch`?
+ fn rustc_version_atleast(&self, major: u32, minor: u32, patch: u32) -> bool {
+ let check_version = 100000 * major + 100 * minor + patch;
+ let actual_version = self
+ .0
+ .get("CONFIG_RUSTC_VERSION")
+ .unwrap()
+ .parse::<u32>()
+ .unwrap();
+ check_version <= actual_version
+ }
}
fn main() {
@@ -182,6 +194,9 @@ fn main() {
}
} else if cfg.has("X86_64") {
ts.push("arch", "x86_64");
+ if cfg.rustc_version_atleast(1, 86, 0) {
+ ts.push("rustc-abi", "x86-softfloat");
+ }
ts.push(
"data-layout",
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128",
@@ -215,6 +230,9 @@ fn main() {
panic!("32-bit x86 only works under UML");
}
ts.push("arch", "x86");
+ if cfg.rustc_version_atleast(1, 86, 0) {
+ ts.push("rustc-abi", "x86-softfloat");
+ }
ts.push(
"data-layout",
"e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128",
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index e18ae7dc8140..36b28987a2f0 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -507,6 +507,9 @@ static int parse_elf(struct elf_info *info, const char *filename)
info->modinfo_len = sechdrs[i].sh_size;
} else if (!strcmp(secname, ".export_symbol")) {
info->export_symbol_secndx = i;
+ } else if (!strcmp(secname, ".no_trim_symbol")) {
+ info->no_trim_symbol = (void *)hdr + sechdrs[i].sh_offset;
+ info->no_trim_symbol_len = sechdrs[i].sh_size;
}
if (sechdrs[i].sh_type == SHT_SYMTAB) {
@@ -1566,6 +1569,14 @@ static void read_symbols(const char *modname)
/* strip trailing .o */
mod = new_module(modname, strlen(modname) - strlen(".o"));
+ /* save .no_trim_symbol section for later use */
+ if (info.no_trim_symbol_len) {
+ mod->no_trim_symbol = xmalloc(info.no_trim_symbol_len);
+ memcpy(mod->no_trim_symbol, info.no_trim_symbol,
+ info.no_trim_symbol_len);
+ mod->no_trim_symbol_len = info.no_trim_symbol_len;
+ }
+
if (!mod->is_vmlinux) {
license = get_modinfo(&info, "license");
if (!license)
@@ -1728,6 +1739,28 @@ static void handle_white_list_exports(const char *white_list)
free(buf);
}
+/*
+ * Keep symbols recorded in the .no_trim_symbol section. This is necessary to
+ * prevent CONFIG_TRIM_UNUSED_KSYMS from dropping EXPORT_SYMBOL because
+ * symbol_get() relies on the symbol being present in the ksymtab for lookups.
+ */
+static void keep_no_trim_symbols(struct module *mod)
+{
+ unsigned long size = mod->no_trim_symbol_len;
+
+ for (char *s = mod->no_trim_symbol; s; s = next_string(s , &size)) {
+ struct symbol *sym;
+
+ /*
+ * If find_symbol() returns NULL, this symbol is not provided
+ * by any module, and symbol_get() will fail.
+ */
+ sym = find_symbol(s);
+ if (sym)
+ sym->used = true;
+ }
+}
+
static void check_modname_len(struct module *mod)
{
const char *mod_name;
@@ -2254,6 +2287,8 @@ int main(int argc, char **argv)
read_symbols_from_files(files_source);
list_for_each_entry(mod, &modules, list) {
+ keep_no_trim_symbols(mod);
+
if (mod->dump_file || mod->is_vmlinux)
continue;
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index ffd0a52a606e..59366f456b76 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -111,6 +111,8 @@ struct module_alias {
*
* @dump_file: path to the .symvers file if loaded from a file
* @aliases: list head for module_aliases
+ * @no_trim_symbol: .no_trim_symbol section data
+ * @no_trim_symbol_len: length of the .no_trim_symbol section
*/
struct module {
struct list_head list;
@@ -128,6 +130,8 @@ struct module {
// Actual imported namespaces
struct list_head imported_namespaces;
struct list_head aliases;
+ char *no_trim_symbol;
+ unsigned int no_trim_symbol_len;
char name[];
};
@@ -141,6 +145,8 @@ struct elf_info {
char *strtab;
char *modinfo;
unsigned int modinfo_len;
+ char *no_trim_symbol;
+ unsigned int no_trim_symbol_len;
/* support for 32bit section numbers */
diff --git a/scripts/module.lds.S b/scripts/module.lds.S
index c2f80f9141d4..450f1088d5fd 100644
--- a/scripts/module.lds.S
+++ b/scripts/module.lds.S
@@ -16,6 +16,7 @@ SECTIONS {
*(.discard)
*(.discard.*)
*(.export_symbol)
+ *(.no_trim_symbol)
}
__ksymtab 0 : ALIGN(8) { *(SORT(___ksymtab+*)) }
diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build
index bb6e23c1174e..b724626ea0ca 100755
--- a/scripts/package/install-extmod-build
+++ b/scripts/package/install-extmod-build
@@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then
# Clear VPATH and srcroot because the source files reside in the output
# directory.
# shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make
- "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts
+ "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts
rm -f "${destdir}/scripts/Kbuild"
fi
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index d9fa69632147..0f78898bce09 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1981,6 +1981,114 @@ static int tomoyo_truncate(char *str)
}
/**
+ * tomoyo_numscan - sscanf() which stores the length of a decimal integer value.
+ *
+ * @str: String to scan.
+ * @head: Leading string that must start with.
+ * @width: Pointer to "int" for storing length of a decimal integer value after @head.
+ * @tail: Optional character that must match after a decimal integer value.
+ *
+ * Returns whether @str starts with @head and a decimal value follows @head.
+ */
+static bool tomoyo_numscan(const char *str, const char *head, int *width, const char tail)
+{
+ const char *cp;
+ const int n = strlen(head);
+
+ if (!strncmp(str, head, n)) {
+ cp = str + n;
+ while (*cp && *cp >= '0' && *cp <= '9')
+ cp++;
+ if (*cp == tail || !tail) {
+ *width = cp - (str + n);
+ return *width != 0;
+ }
+ }
+ *width = 0;
+ return 0;
+}
+
+/**
+ * tomoyo_patternize_path - Make patterns for file path. Used by learning mode.
+ *
+ * @buffer: Destination buffer.
+ * @len: Size of @buffer.
+ * @entry: Original line.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_patternize_path(char *buffer, const int len, char *entry)
+{
+ int width;
+ char *cp = entry;
+
+ /* Nothing to do if this line is not for "file" related entry. */
+ if (strncmp(entry, "file ", 5))
+ goto flush;
+ /*
+ * Nothing to do if there is no colon in this line, for this rewriting
+ * applies to only filesystems where numeric values in the path are volatile.
+ */
+ cp = strchr(entry + 5, ':');
+ if (!cp) {
+ cp = entry;
+ goto flush;
+ }
+ /* Flush e.g. "file ioctl" part. */
+ while (*cp != ' ')
+ cp--;
+ *cp++ = '\0';
+ tomoyo_addprintf(buffer, len, "%s ", entry);
+ /* e.g. file ioctl pipe:[$INO] $CMD */
+ if (tomoyo_numscan(cp, "pipe:[", &width, ']')) {
+ cp += width + 7;
+ tomoyo_addprintf(buffer, len, "pipe:[\\$]");
+ goto flush;
+ }
+ /* e.g. file ioctl socket:[$INO] $CMD */
+ if (tomoyo_numscan(cp, "socket:[", &width, ']')) {
+ cp += width + 9;
+ tomoyo_addprintf(buffer, len, "socket:[\\$]");
+ goto flush;
+ }
+ if (!strncmp(cp, "proc:/self", 10)) {
+ /* e.g. file read proc:/self/task/$TID/fdinfo/$FD */
+ cp += 10;
+ tomoyo_addprintf(buffer, len, "proc:/self");
+ } else if (tomoyo_numscan(cp, "proc:/", &width, 0)) {
+ /* e.g. file read proc:/$PID/task/$TID/fdinfo/$FD */
+ /*
+ * Don't patternize $PID part if $PID == 1, for several
+ * programs access only files in /proc/1/ directory.
+ */
+ cp += width + 6;
+ if (width == 1 && *(cp - 1) == '1')
+ tomoyo_addprintf(buffer, len, "proc:/1");
+ else
+ tomoyo_addprintf(buffer, len, "proc:/\\$");
+ } else {
+ goto flush;
+ }
+ /* Patternize $TID part if "/task/" follows. */
+ if (tomoyo_numscan(cp, "/task/", &width, 0)) {
+ cp += width + 6;
+ tomoyo_addprintf(buffer, len, "/task/\\$");
+ }
+ /* Patternize $FD part if "/fd/" or "/fdinfo/" follows. */
+ if (tomoyo_numscan(cp, "/fd/", &width, 0)) {
+ cp += width + 4;
+ tomoyo_addprintf(buffer, len, "/fd/\\$");
+ } else if (tomoyo_numscan(cp, "/fdinfo/", &width, 0)) {
+ cp += width + 8;
+ tomoyo_addprintf(buffer, len, "/fdinfo/\\$");
+ }
+flush:
+ /* Flush remaining part if any. */
+ if (*cp)
+ tomoyo_addprintf(buffer, len, "%s", cp);
+}
+
+/**
* tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode.
*
* @domain: Pointer to "struct tomoyo_domain_info".
@@ -2003,7 +2111,8 @@ static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header)
if (!cp)
return;
*cp++ = '\0';
- len = strlen(cp) + 1;
+ /* Reserve some space for potentially using patterns. */
+ len = strlen(cp) + 16;
/* strstr() will return NULL if ordering is wrong. */
if (*cp == 'f') {
argv0 = strstr(header, " argv[]={ \"");
@@ -2020,40 +2129,10 @@ static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header)
if (symlink)
len += tomoyo_truncate(symlink + 1) + 1;
}
- buffer = kmalloc(len, GFP_NOFS);
+ buffer = kmalloc(len, GFP_NOFS | __GFP_ZERO);
if (!buffer)
return;
- snprintf(buffer, len - 1, "%s", cp);
- if (*cp == 'f' && strchr(buffer, ':')) {
- /* Automatically replace 2 or more digits with \$ pattern. */
- char *cp2;
-
- /* e.g. file read proc:/$PID/stat */
- cp = strstr(buffer, " proc:/");
- if (cp && simple_strtoul(cp + 7, &cp2, 10) >= 10 && *cp2 == '/') {
- *(cp + 7) = '\\';
- *(cp + 8) = '$';
- memmove(cp + 9, cp2, strlen(cp2) + 1);
- goto ok;
- }
- /* e.g. file ioctl pipe:[$INO] $CMD */
- cp = strstr(buffer, " pipe:[");
- if (cp && simple_strtoul(cp + 7, &cp2, 10) >= 10 && *cp2 == ']') {
- *(cp + 7) = '\\';
- *(cp + 8) = '$';
- memmove(cp + 9, cp2, strlen(cp2) + 1);
- goto ok;
- }
- /* e.g. file ioctl socket:[$INO] $CMD */
- cp = strstr(buffer, " socket:[");
- if (cp && simple_strtoul(cp + 9, &cp2, 10) >= 10 && *cp2 == ']') {
- *(cp + 9) = '\\';
- *(cp + 10) = '$';
- memmove(cp + 11, cp2, strlen(cp2) + 1);
- goto ok;
- }
- }
-ok:
+ tomoyo_patternize_path(buffer, len, cp);
if (realpath)
tomoyo_addprintf(buffer, len, " exec.%s", realpath);
if (argv0)
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 3a7b0874cf44..5f9ccab26e9a 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -920,7 +920,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
#ifdef CONFIG_MMU
/*
* This is called at execve() time in order to dig around
- * in the argv/environment of the new proceess
+ * in the argv/environment of the new process
* (represented by bprm).
*/
mmap_read_lock(bprm->mm);
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index a2705798476f..7e69747b2f77 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -229,11 +229,11 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode,
}
/**
- * tomoyo_initerface_init - Initialize /sys/kernel/security/tomoyo/ interface.
+ * tomoyo_interface_init - Initialize /sys/kernel/security/tomoyo/ interface.
*
* Returns 0.
*/
-static int __init tomoyo_initerface_init(void)
+static int __init tomoyo_interface_init(void)
{
struct tomoyo_domain_info *domain;
struct dentry *tomoyo_dir;
@@ -270,4 +270,4 @@ static int __init tomoyo_initerface_init(void)
return 0;
}
-fs_initcall(tomoyo_initerface_init);
+fs_initcall(tomoyo_interface_init);
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 04a92c3d65d4..d6ebcd9db80a 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -549,10 +549,7 @@ static const struct lsm_id tomoyo_lsmid = {
.id = LSM_ID_TOMOYO,
};
-/*
- * tomoyo_security_ops is a "struct security_operations" which is used for
- * registering TOMOYO.
- */
+/* tomoyo_hooks is used for registering TOMOYO. */
static struct security_hook_list tomoyo_hooks[] __ro_after_init = {
LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare),
LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds),
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 8eb6aa606a0d..46d289611ce8 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void)
return;
}
+ if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n");
+ return;
+ }
if (strcmp(sm->str + sm->mnt_point, "/") != 0) {
ksft_test_result_fail("unexpected mount point: '%s' != '/'\n",
sm->str + sm->mnt_point);
@@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void)
strerror(errno));
return;
}
+ if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n");
+ return;
+ }
mnt_root = sm->str + sm->mnt_root;
last_root = strrchr(mnt_root, '/');
if (last_root)
@@ -437,6 +445,10 @@ static void test_statmount_fs_type(void)
strerror(errno));
return;
}
+ if (!(sm->mask & STATMOUNT_FS_TYPE)) {
+ ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n");
+ return;
+ }
fs_type = sm->str + sm->fs_type;
for (s = known_fs; s != NULL; s++) {
if (strcmp(fs_type, *s) == 0)
@@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void)
return;
}
+ if (!(sm->mask & STATMOUNT_MNT_BASIC)) {
+ ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n");
+ return;
+ }
+
while (getline(&line, &len, f_mountinfo) != -1) {
int i;
char *p, *p2;
@@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void)
if (p2)
*p2 = '\0';
- statmount_opts = sm->str + sm->mnt_opts;
+ if (sm->mask & STATMOUNT_MNT_OPTS)
+ statmount_opts = sm->str + sm->mnt_opts;
+ else
+ statmount_opts = "";
if (strcmp(statmount_opts, p) != 0)
ksft_test_result_fail(
"unexpected mount options: '%s' != '%s'\n",
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index e32dd59703a0..85cc8c18d6e7 100644
--- a/tools/testing/selftests/kvm/s390/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -444,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
);
}
-static void test_get_inital_dirty(void)
+static void test_get_initial_dirty(void)
{
struct kvm_vm *vm = create_vm_two_memslots();
struct kvm_vcpu *vcpu;
@@ -651,7 +651,7 @@ struct testdef {
} testlist[] = {
{ "migration mode and dirty tracking", test_migration_mode },
{ "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
- { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+ { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty },
{ "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
};
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index 135ee22856cf..d265b34c54be 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -88,10 +88,6 @@ asm("test_skey_asm:\n"
" ahi %r0,1\n"
" st %r1,0(%r5,%r6)\n"
- " iske %r1,%r6\n"
- " ahi %r0,1\n"
- " diag 0,0,0x44\n"
-
" sske %r1,%r6\n"
" xgr %r1,%r1\n"
" iske %r1,%r6\n"
@@ -459,10 +455,14 @@ TEST_F(uc_kvm, uc_no_user_region)
};
ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
- ASSERT_EQ(EINVAL, errno);
+ ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+ TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION",
+ strerror(errno), errno);
ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
- ASSERT_EQ(EINVAL, errno);
+ ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+ TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2",
+ strerror(errno), errno);
}
TEST_F(uc_kvm, uc_map_unmap)
@@ -596,7 +596,9 @@ TEST_F(uc_kvm, uc_skey)
ASSERT_EQ(true, uc_handle_exit(self));
ASSERT_EQ(1, sync_regs->gprs[0]);
- /* ISKE */
+ /* SSKE + ISKE */
+ sync_regs->gprs[1] = skeyvalue;
+ run->kvm_dirty_regs |= KVM_SYNC_GPRS;
ASSERT_EQ(0, uc_run_once(self));
/*
@@ -608,21 +610,11 @@ TEST_F(uc_kvm, uc_skey)
TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
- TEST_REQUIRE(sie_block->ipa != 0xb229);
+ TEST_REQUIRE(sie_block->ipa != 0xb22b);
- /* ISKE contd. */
+ /* SSKE + ISKE contd. */
ASSERT_EQ(false, uc_handle_exit(self));
ASSERT_EQ(2, sync_regs->gprs[0]);
- /* assert initial skey (ACC = 0, R & C = 1) */
- ASSERT_EQ(0x06, sync_regs->gprs[1]);
- uc_assert_diag44(self);
-
- /* SSKE + ISKE */
- sync_regs->gprs[1] = skeyvalue;
- run->kvm_dirty_regs |= KVM_SYNC_GPRS;
- ASSERT_EQ(0, uc_run_once(self));
- ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(3, sync_regs->gprs[0]);
ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
uc_assert_diag44(self);
@@ -631,7 +623,7 @@ TEST_F(uc_kvm, uc_skey)
run->kvm_dirty_regs |= KVM_SYNC_GPRS;
ASSERT_EQ(0, uc_run_once(self));
ASSERT_EQ(false, uc_handle_exit(self));
- ASSERT_EQ(4, sync_regs->gprs[0]);
+ ASSERT_EQ(3, sync_regs->gprs[0]);
/* assert R reset but rest of skey unchanged */
ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 8c3a73461475..14ba51b52095 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -47,6 +47,7 @@
#include <linux/kcmp.h>
#include <sys/resource.h>
#include <sys/capability.h>
+#include <linux/perf_event.h>
#include <unistd.h>
#include <sys/syscall.h>
@@ -68,6 +69,10 @@
# define PR_SET_PTRACER 0x59616d61
#endif
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
#ifndef PR_SET_NO_NEW_PRIVS
#define PR_SET_NO_NEW_PRIVS 38
#define PR_GET_NO_NEW_PRIVS 39
@@ -4888,6 +4893,200 @@ TEST(tsync_vs_dead_thread_leader)
EXPECT_EQ(0, status);
}
+noinline int probed(void)
+{
+ return 1;
+}
+
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+ int err = -1, ret;
+ FILE *f;
+
+ f = fopen(file, "re");
+ if (f) {
+ err = fscanf(f, fmt, &ret);
+ fclose(f);
+ }
+ return err == 1 ? ret : err;
+}
+
+static int determine_uprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, base, end;
+ bool found = false;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -1;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+ fclose(f);
+ return found ? (uintptr_t)addr - start + base : -1;
+}
+
+FIXTURE(URETPROBE) {
+ int fd;
+};
+
+FIXTURE_VARIANT(URETPROBE) {
+ /*
+ * All of the URETPROBE behaviors can be tested with either
+ * uretprobe attached or not
+ */
+ bool attach;
+};
+
+FIXTURE_VARIANT_ADD(URETPROBE, attached) {
+ .attach = true,
+};
+
+FIXTURE_VARIANT_ADD(URETPROBE, not_attached) {
+ .attach = false,
+};
+
+FIXTURE_SETUP(URETPROBE)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
+ ssize_t offset;
+ int type, bit;
+
+#ifndef __NR_uretprobe
+ SKIP(return, "__NR_uretprobe syscall not defined");
+#endif
+
+ if (!variant->attach)
+ return;
+
+ memset(&attr, 0, attr_sz);
+
+ type = determine_uprobe_perf_type();
+ ASSERT_GE(type, 0);
+ bit = determine_uprobe_retprobe_bit();
+ ASSERT_GE(bit, 0);
+ offset = get_uprobe_offset(probed);
+ ASSERT_GE(offset, 0);
+
+ attr.config |= 1 << bit;
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config1 = ptr_to_u64("/proc/self/exe");
+ attr.config2 = offset;
+
+ self->fd = syscall(__NR_perf_event_open, &attr,
+ getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */,
+ PERF_FLAG_FD_CLOEXEC);
+}
+
+FIXTURE_TEARDOWN(URETPROBE)
+{
+ /* we could call close(self->fd), but we'd need extra filter for
+ * that and since we are calling _exit right away..
+ */
+}
+
+static int run_probed_with_filter(struct sock_fprog *prog)
+{
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) {
+ return -1;
+ }
+
+ probed();
+ return 0;
+}
+
+TEST_F(URETPROBE, uretprobe_default_allow)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_default_block)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uretprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
+#endif
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uretprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
/*
* TODO:
* - expand NNP testing
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index faf10671eed2..ba0327e2d0d3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1071,15 +1071,6 @@ out_err:
}
/*
- * Called after the VM is otherwise initialized, but just before adding it to
- * the vm_list.
- */
-int __weak kvm_arch_post_init_vm(struct kvm *kvm)
-{
- return 0;
-}
-
-/*
* Called just after removing the VM from the vm_list, but before doing any
* other destruction.
*/
@@ -1199,10 +1190,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
if (r)
goto out_err_no_debugfs;
- r = kvm_arch_post_init_vm(kvm);
- if (r)
- goto out_err;
-
mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
mutex_unlock(&kvm_lock);
@@ -1212,8 +1199,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
return kvm;
-out_err:
- kvm_destroy_vm_debugfs(kvm);
out_err_no_debugfs:
kvm_coalesced_mmio_free(kvm);
out_no_coalesced_mmio:
@@ -1971,7 +1956,15 @@ static int kvm_set_memory_region(struct kvm *kvm,
return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
return -EINVAL;
- if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
+
+ /*
+ * The size of userspace-defined memory regions is restricted in order
+ * to play nice with dirty bitmap operations, which are indexed with an
+ * "unsigned int". KVM's internal memory regions don't support dirty
+ * logging, and so are exempt.
+ */
+ if (id < KVM_USER_MEM_SLOTS &&
+ (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
return -EINVAL;
slots = __kvm_memslots(kvm, as_id);