diff options
575 files changed, 10425 insertions, 3840 deletions
@@ -142,13 +142,17 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com> Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com> Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com> Brian Avery <b.avery@hp.com> +Brian Cain <bcain@kernel.org> <brian.cain@oss.qualcomm.com> +Brian Cain <bcain@kernel.org> <bcain@quicinc.com> Brian King <brking@us.ibm.com> Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com> Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com> Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org> Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org> -Carlos Bilbao <carlos.bilbao.osdev@gmail.com> <carlos.bilbao@amd.com> +Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com> +Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao.osdev@gmail.com> +Carlos Bilbao <carlos.bilbao@kernel.org> <bilbao@vt.edu> Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com> Changbin Du <changbin.du@intel.com> <changbin.du@intel.com> Chao Yu <chao@kernel.org> <chao2.yu@samsung.com> @@ -165,6 +169,7 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com> Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com> Christian Marangi <ansuelsmth@gmail.com> Christophe Ricard <christophe.ricard@gmail.com> +Christopher Obbard <christopher.obbard@linaro.org> <chris.obbard@collabora.com> Christoph Hellwig <hch@lst.de> Chuck Lever <chuck.lever@oracle.com> <cel@kernel.org> Chuck Lever <chuck.lever@oracle.com> <cel@netapp.com> @@ -261,6 +266,7 @@ Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com> Guru Das Srinagesh <quic_gurus@quicinc.com> <gurus@codeaurora.org> Gustavo Padovan <gustavo@las.ic.unicamp.br> Gustavo Padovan <padovan@profusion.mobi> +Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com> Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org> Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com> Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl> @@ -761,6 +767,7 @@ Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de> Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com> Yanteng Si <si.yanteng@linux.dev> <siyanteng@loongson.cn> Ying Huang <huang.ying.caritas@gmail.com> <ying.huang@intel.com> +Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com> Yusuke Goda <goda.yusuke@renesas.com> Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com> Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> diff --git a/Documentation/ABI/stable/sysfs-class-bluetooth b/Documentation/ABI/stable/sysfs-class-bluetooth new file mode 100644 index 000000000000..36be02471174 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-class-bluetooth @@ -0,0 +1,9 @@ +What: /sys/class/bluetooth/hci<index>/reset +Date: 14-Jan-2025 +KernelVersion: 6.13 +Contact: linux-bluetooth@vger.kernel.org +Description: This write-only attribute allows users to trigger the vendor reset + method on the Bluetooth device when arbitrary data is written. + The reset may or may not be done through the device transport + (e.g., UART/USB), and can also be done through an out-of-band + approach such as GPIO. diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 955fbcd19ce9..f273ea15a8e8 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -293,3 +293,13 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are not supported at all and will generate a misaligned address fault. + +* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the + thead vendor extensions that are compatible with the + :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior. + + * T-HEAD + + * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor + extension is supported in the T-Head ISA extensions spec starting from + commit a18c801634 ("Add T-Head VECTOR vendor extension. "). diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index 51665a3e6a50..1e0e7358e14a 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -333,6 +333,4 @@ References .. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README -.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ - .. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index 371317554863..0db2cbd7891f 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -1091,6 +1091,7 @@ properties: - dmo,imx8mp-data-modul-edm-sbc # i.MX8MP eDM SBC - emcraft,imx8mp-navqp # i.MX8MP Emcraft Systems NavQ+ Kit - fsl,imx8mp-evk # i.MX8MP EVK Board + - fsl,imx8mp-evk-revb4 # i.MX8MP EVK Rev B4 Board - gateworks,imx8mp-gw71xx-2x # i.MX8MP Gateworks Board - gateworks,imx8mp-gw72xx-2x # i.MX8MP Gateworks Board - gateworks,imx8mp-gw73xx-2x # i.MX8MP Gateworks Board @@ -1271,6 +1272,7 @@ properties: items: - enum: - fsl,imx8qm-mek # i.MX8QM MEK Board + - fsl,imx8qm-mek-revd # i.MX8QM MEK Rev D Board - toradex,apalis-imx8 # Apalis iMX8 Modules - toradex,apalis-imx8-v1.1 # Apalis iMX8 V1.1 Modules - const: fsl,imx8qm @@ -1299,6 +1301,7 @@ properties: - enum: - einfochips,imx8qxp-ai_ml # i.MX8QXP AI_ML Board - fsl,imx8qxp-mek # i.MX8QXP MEK Board + - fsl,imx8qxp-mek-wcpu # i.MX8QXP MEK WCPU Board - const: fsl,imx8qxp - description: i.MX8DXL based Boards diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml index f117471fb06f..e7ee0d9efed8 100644 --- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml @@ -22,12 +22,12 @@ properties: oneOf: - items: - enum: - - qcom,qcs8300-ethqos - - const: qcom,sa8775p-ethqos + - qcom,qcs615-ethqos + - const: qcom,qcs404-ethqos - items: - enum: - - qcom,qcs615-ethqos - - const: qcom,sm8150-ethqos + - qcom,qcs8300-ethqos + - const: qcom,sa8775p-ethqos - enum: - qcom,qcs404-ethqos - qcom,sa8775p-ethqos diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index acb5b9ba6f04..2c72f148a74b 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -26,6 +26,18 @@ description: | allOf: - $ref: /schemas/cpu.yaml# - $ref: extensions.yaml + - if: + not: + properties: + compatible: + contains: + enum: + - thead,c906 + - thead,c910 + - thead,c920 + then: + properties: + thead,vlenb: false properties: compatible: @@ -96,6 +108,13 @@ properties: description: The blocksize in bytes for the Zicboz cache operations. + thead,vlenb: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + VLEN/8, the vector register length in bytes. This property is required on + thead systems where the vector register length is not identical on all harts, or + the vlenb CSR is not available. + # RISC-V has multiple properties for cache op block sizes as the sizes # differ between individual CBO extensions cache-op-block-size: false diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 9c7dd7e75e0c..a63b994e0763 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -621,6 +621,10 @@ properties: latency, as ratified in commit 56ed795 ("Update riscv-crypto-spec-vector.adoc") of riscv-crypto. + # vendor extensions, each extension sorted alphanumerically under the + # vendor they belong to. Vendors are sorted alphanumerically as well. + + # Andes - const: xandespmu description: The Andes Technology performance monitor extension for counter overflow @@ -628,6 +632,12 @@ properties: Registers in the AX45MP datasheet. https://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf + # T-HEAD + - const: xtheadvector + description: + The T-HEAD specific 0.7.1 vector implementation as written in + https://github.com/T-head-Semi/thead-extension-spec/blob/95358cb2cca9489361c61d335e03d3134b14133f/xtheadvector.adoc. + allOf: # Zcb depends on Zca - if: diff --git a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml index a14b52178c4b..2599b847f406 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc-mxc.yaml @@ -14,9 +14,13 @@ maintainers: properties: compatible: - enum: - - fsl,imx1-rtc - - fsl,imx21-rtc + oneOf: + - const: fsl,imx1-rtc + - const: fsl,imx21-rtc + - items: + - enum: + - fsl,imx31-rtc + - const: fsl,imx21-rtc reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml index 5aa00617291c..1f0e6787a746 100644 --- a/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml +++ b/Documentation/devicetree/bindings/sound/ti,pcm1681.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/sound/ti,pcm1681.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Texas Instruments PCM1681 8-channel PWM Processor +title: Texas Instruments PCM1681 8-channel Digital-to-Analog Converter maintainers: - Shenghao Ding <shenghao-ding@ti.com> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index f5e3676db954..d20a32b77b60 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -17,7 +17,8 @@ dentry_operations prototypes:: - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -30,6 +31,8 @@ prototypes:: struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); locking rules: @@ -49,6 +52,8 @@ d_dname: no no no no d_automount: no no yes no d_manage: no no yes (ref-walk) maybe d_real no no yes no +d_unalias_trylock yes no no no +d_unalias_unlock yes no no no ================== =========== ======== ============== ======== inode_operations diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index c1c121055204..1639e78e3146 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1141,3 +1141,19 @@ pointer are gone. set_blocksize() takes opened struct file instead of struct block_device now and it *must* be opened exclusive. + +--- + +** mandatory** + +->d_revalidate() gets two extra arguments - inode of parent directory and +name our dentry is expected to have. Both are stable (dir is pinned in +non-RCU case and will stay around during the call in RCU case, and name +is guaranteed to stay unchanging). Your instance doesn't have to use +either, but it often helps to avoid a lot of painful boilerplate. +Note that while name->name is stable and NUL-terminated, it may (and +often will) have name->name[name->len] equal to '/' rather than '\0' - +in normal case it points into the pathname being looked up. +NOTE: if you need something like full path from the root of filesystem, +you are still on your own - this assists with simple cases, but it's not +magic. diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 0b18af3f954e..31eea688609a 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1251,7 +1251,8 @@ defined: .. code-block:: c struct dentry_operations { - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -1264,6 +1265,8 @@ defined: struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); }; ``d_revalidate`` @@ -1427,6 +1430,25 @@ defined: For non-regular files, the 'dentry' argument is returned. +``d_unalias_trylock`` + if present, will be called by d_splice_alias() before moving a + preexisting attached alias. Returning false prevents __d_move(), + making d_splice_alias() fail with -ESTALE. + + Rationale: setting FS_RENAME_DOES_D_MOVE will prevent d_move() + and d_exchange() calls from the outside of filesystem methods; + however, it does not guarantee that attached dentries won't + be renamed or moved by d_splice_alias() finding a preexisting + alias for a directory inode. Normally we would not care; + however, something that wants to stabilize the entire path to + root over a blocking operation might need that. See 9p for one + (and hopefully only) example. + +``d_unalias_unlock`` + should be paired with ``d_unalias_trylock``; that one is called after + __d_move() call in __d_unalias(). + + Each dentry has a pointer to its parent dentry, as well as a hash list of child dentries. Child dentries are basically like files in a directory. diff --git a/Documentation/kbuild/gendwarfksyms.rst b/Documentation/kbuild/gendwarfksyms.rst new file mode 100644 index 000000000000..e4beaae7e456 --- /dev/null +++ b/Documentation/kbuild/gendwarfksyms.rst @@ -0,0 +1,308 @@ +======================= +DWARF module versioning +======================= + +1. Introduction +=============== + +When CONFIG_MODVERSIONS is enabled, symbol versions for modules +are typically calculated from preprocessed source code using the +**genksyms** tool. However, this is incompatible with languages such +as Rust, where the source code has insufficient information about +the resulting ABI. With CONFIG_GENDWARFKSYMS (and CONFIG_DEBUG_INFO) +selected, **gendwarfksyms** is used instead to calculate symbol versions +from the DWARF debugging information, which contains the necessary +details about the final module ABI. + +1.1. Usage +========== + +gendwarfksyms accepts a list of object files on the command line, and a +list of symbol names (one per line) in standard input:: + + Usage: gendwarfksyms [options] elf-object-file ... < symbol-list + + Options: + -d, --debug Print debugging information + --dump-dies Dump DWARF DIE contents + --dump-die-map Print debugging information about die_map changes + --dump-types Dump type strings + --dump-versions Dump expanded type strings used for symbol versions + -s, --stable Support kABI stability features + -T, --symtypes file Write a symtypes file + -h, --help Print this message + + +2. Type information availability +================================ + +While symbols are typically exported in the same translation unit (TU) +where they're defined, it's also perfectly fine for a TU to export +external symbols. For example, this is done when calculating symbol +versions for exports in stand-alone assembly code. + +To ensure the compiler emits the necessary DWARF type information in the +TU where symbols are actually exported, gendwarfksyms adds a pointer +to exported symbols in the `EXPORT_SYMBOL()` macro using the following +macro:: + + #define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; + + +When a symbol pointer is found in DWARF, gendwarfksyms can use its +type for calculating symbol versions even if the symbol is defined +elsewhere. The name of the symbol pointer is expected to start with +`__gendwarfksyms_ptr_`, followed by the name of the exported symbol. + +3. Symtypes output format +========================= + +Similarly to genksyms, gendwarfksyms supports writing a symtypes +file for each processed object that contain types for exported +symbols and each referenced type that was used in calculating symbol +versions. These files can be useful when trying to determine what +exactly caused symbol versions to change between builds. To generate +symtypes files during a kernel build, set `KBUILD_SYMTYPES=1`. + +Matching the existing format, the first column of each line contains +either a type reference or a symbol name. Type references have a +one-letter prefix followed by "#" and the name of the type. Four +reference types are supported:: + + e#<type> = enum + s#<type> = struct + t#<type> = typedef + u#<type> = union + +Type names with spaces in them are wrapped in single quotes, e.g.:: + + s#'core::result::Result<u8, core::num::error::ParseIntError>' + +The rest of the line contains a type string. Unlike with genksyms that +produces C-style type strings, gendwarfksyms uses the same simple parsed +DWARF format produced by **--dump-dies**, but with type references +instead of fully expanded strings. + +4. Maintaining a stable kABI +============================ + +Distribution maintainers often need the ability to make ABI compatible +changes to kernel data structures due to LTS updates or backports. Using +the traditional `#ifndef __GENKSYMS__` to hide these changes from symbol +versioning won't work when processing object files. To support this +use case, gendwarfksyms provides kABI stability features designed to +hide changes that won't affect the ABI when calculating versions. These +features are all gated behind the **--stable** command line flag and are +not used in the mainline kernel. To use stable features during a kernel +build, set `KBUILD_GENDWARFKSYMS_STABLE=1`. + +Examples for using these features are provided in the +**scripts/gendwarfksyms/examples** directory, including helper macros +for source code annotation. Note that as these features are only used to +transform the inputs for symbol versioning, the user is responsible for +ensuring that their changes actually won't break the ABI. + +4.1. kABI rules +=============== + +kABI rules allow distributions to fine-tune certain parts +of gendwarfksyms output and thus control how symbol +versions are calculated. These rules are defined in the +`.discard.gendwarfksyms.kabi_rules` section of the object file and +consist of simple null-terminated strings with the following structure:: + + version\0type\0target\0value\0 + +This string sequence is repeated as many times as needed to express all +the rules. The fields are as follows: + +- `version`: Ensures backward compatibility for future changes to the + structure. Currently expected to be "1". +- `type`: Indicates the type of rule being applied. +- `target`: Specifies the target of the rule, typically the fully + qualified name of the DWARF Debugging Information Entry (DIE). +- `value`: Provides rule-specific data. + +The following helper macro, for example, can be used to specify rules +in the source code:: + + #define __KABI_RULE(hint, target, value) \ + static const char __PASTE(__gendwarfksyms_rule_, \ + __COUNTER__)[] __used __aligned(1) \ + __section(".discard.gendwarfksyms.kabi_rules") = \ + "1\0" #hint "\0" #target "\0" #value + + +Currently, only the rules discussed in this section are supported, but +the format is extensible enough to allow further rules to be added as +need arises. + +4.1.1. Managing definition visibility +===================================== + +A declaration can change into a full definition when additional includes +are pulled into the translation unit. This changes the versions of any +symbol that references the type even if the ABI remains unchanged. As +it may not be possible to drop includes without breaking the build, the +`declonly` rule can be used to specify a type as declaration-only, even +if the debugging information contains the full definition. + +The rule fields are expected to be as follows: + +- `type`: "declonly" +- `target`: The fully qualified name of the target data structure + (as shown in **--dump-dies** output). +- `value`: This field is ignored. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, ) + +Example usage:: + + struct s { + /* definition */ + }; + + KABI_DECLONLY(s); + +4.1.2. Adding enumerators +========================= + +For enums, all enumerators and their values are included in calculating +symbol versions, which becomes a problem if we later need to add more +enumerators without changing symbol versions. The `enumerator_ignore` +rule allows us to hide named enumerators from the input. + +The rule fields are expected to be as follows: + +- `type`: "enumerator_ignore" +- `target`: The fully qualified name of the target enum + (as shown in **--dump-dies** output) and the name of the + enumerator field separated by a space. +- `value`: This field is ignored. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_ENUMERATOR_IGNORE(fqn, field) \ + __KABI_RULE(enumerator_ignore, fqn field, ) + +Example usage:: + + enum e { + A, B, C, D, + }; + + KABI_ENUMERATOR_IGNORE(e, B); + KABI_ENUMERATOR_IGNORE(e, C); + +If the enum additionally includes an end marker and new values must +be added in the middle, we may need to use the old value for the last +enumerator when calculating versions. The `enumerator_value` rule allows +us to override the value of an enumerator for version calculation: + +- `type`: "enumerator_value" +- `target`: The fully qualified name of the target enum + (as shown in **--dump-dies** output) and the name of the + enumerator field separated by a space. +- `value`: Integer value used for the field. + +Using the `__KABI_RULE` macro, this rule can be defined as:: + + #define KABI_ENUMERATOR_VALUE(fqn, field, value) \ + __KABI_RULE(enumerator_value, fqn field, value) + +Example usage:: + + enum e { + A, B, C, LAST, + }; + + KABI_ENUMERATOR_IGNORE(e, C); + KABI_ENUMERATOR_VALUE(e, LAST, 2); + +4.3. Adding structure members +============================= + +Perhaps the most common ABI compatible change is adding a member to a +kernel data structure. When changes to a structure are anticipated, +distribution maintainers can pre-emptively reserve space in the +structure and take it into use later without breaking the ABI. If +changes are needed to data structures without reserved space, existing +alignment holes can potentially be used instead. While kABI rules could +be added for these type of changes, using unions is typically a more +natural method. This section describes gendwarfksyms support for using +reserved space in data structures and hiding members that don't change +the ABI when calculating symbol versions. + +4.3.1. Reserving space and replacing members +============================================ + +Space is typically reserved for later use by appending integer types, or +arrays, to the end of the data structure, but any type can be used. Each +reserved member needs a unique name, but as the actual purpose is usually +not known at the time the space is reserved, for convenience, names that +start with `__kabi_` are left out when calculating symbol versions:: + + struct s { + long a; + long __kabi_reserved_0; /* reserved for future use */ + }; + +The reserved space can be taken into use by wrapping the member in a +union, which includes the original type and the replacement member:: + + struct s { + long a; + union { + long __kabi_reserved_0; /* original type */ + struct b b; /* replaced field */ + }; + }; + +If the `__kabi_` naming scheme was used when reserving space, the name +of the first member of the union must start with `__kabi_reserved`. This +ensures the original type is used when calculating versions, but the name +is again left out. The rest of the union is ignored. + +If we're replacing a member that doesn't follow this naming convention, +we also need to preserve the original name to avoid changing versions, +which we can do by changing the first union member's name to start with +`__kabi_renamed` followed by the original name. + +The examples include `KABI_(RESERVE|USE|REPLACE)*` macros that help +simplify the process and also ensure the replacement member is correctly +aligned and its size won't exceed the reserved space. + +4.3.2. Hiding members +===================== + +Predicting which structures will require changes during the support +timeframe isn't always possible, in which case one might have to resort +to placing new members into existing alignment holes:: + + struct s { + int a; + /* a 4-byte alignment hole */ + unsigned long b; + }; + + +While this won't change the size of the data structure, one needs to +be able to hide the added members from symbol versioning. Similarly +to reserved fields, this can be accomplished by wrapping the added +member to a union where one of the fields has a name starting with +`__kabi_ignored`:: + + struct s { + int a; + union { + char __kabi_ignored_0; + int n; + }; + unsigned long b; + }; + +With **--stable**, both versions produce the same symbol version. diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index cee2f99f734b..e82af05cd652 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -21,6 +21,7 @@ Kernel Build System reproducible-builds gcc-plugins llvm + gendwarfksyms .. only:: subproject and html diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 101de236cd0c..a42f00d8cb90 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -423,6 +423,26 @@ Symbols From the Kernel (vmlinux + modules) 1) It lists all exported symbols from vmlinux and all modules. 2) It lists the CRC if CONFIG_MODVERSIONS is enabled. +Version Information Formats +--------------------------- + + Exported symbols have information stored in __ksymtab or __ksymtab_gpl + sections. Symbol names and namespaces are stored in __ksymtab_strings, + using a format similar to the string table used for ELF. If + CONFIG_MODVERSIONS is enabled, the CRCs corresponding to exported + symbols will be added to the __kcrctab or __kcrctab_gpl. + + If CONFIG_BASIC_MODVERSIONS is enabled (default with + CONFIG_MODVERSIONS), imported symbols will have their symbol name and + CRC stored in the __versions section of the importing module. This + mode only supports symbols of length up to 64 bytes. + + If CONFIG_EXTENDED_MODVERSIONS is enabled (required to enable both + CONFIG_MODVERSIONS and CONFIG_RUST at the same time), imported symbols + will have their symbol name recorded in the __version_ext_names + section as a series of concatenated, null-terminated strings. CRCs for + these symbols will be recorded in the __version_ext_crcs section. + Symbols and External Modules ---------------------------- diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index 62519d38c58b..b018ce346392 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -699,10 +699,10 @@ RAW socket option CAN_RAW_JOIN_FILTERS The CAN_RAW socket can set multiple CAN identifier specific filters that lead to multiple filters in the af_can.c filter processing. These filters -are indenpendent from each other which leads to logical OR'ed filters when +are independent from each other which leads to logical OR'ed filters when applied (see :ref:`socketcan-rawfilter`). -This socket option joines the given CAN filters in the way that only CAN +This socket option joins the given CAN filters in the way that only CAN frames are passed to user space that matched *all* given CAN filters. The semantic for the applied filters is therefore changed to a logical AND. diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index dc45c0211353..03e1d3610333 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -41,7 +41,7 @@ blackhole_timeout - INTEGER (seconds) MPTCP is re-enabled and will reset to the initial value when the blackhole issue goes away. - 0 to disable the blackhole detection. + 0 to disable the blackhole detection. This is a per-namespace sysctl. Default: 3600 diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index 6083210ab2a4..f970a2be271a 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -362,7 +362,7 @@ It is expected that ``irq-suspend-timeout`` will be set to a value much larger than ``gro_flush_timeout`` as ``irq-suspend-timeout`` should suspend IRQs for the duration of one userland processing cycle. -While it is not stricly necessary to use ``napi_defer_hard_irqs`` and +While it is not strictly necessary to use ``napi_defer_hard_irqs`` and ``gro_flush_timeout`` to use IRQ suspension, their use is strongly recommended. diff --git a/Documentation/power/video.rst b/Documentation/power/video.rst index 337a2ba9f32f..8ab2458d1304 100644 --- a/Documentation/power/video.rst +++ b/Documentation/power/video.rst @@ -190,7 +190,7 @@ Toshiba Portege 3020CT s3_mode (3) Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK) Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK) Toshiba Satellite 4090XCDT ??? [#f1]_ -Toshiba Satellite P10-554 s3_bios,s3_mode (4)[#f3]_ +Toshiba Satellite P10-554 s3_bios,s3_mode (4) [#f3]_ Toshiba M30 (2) xor X with nvidia driver using internal AGP Uniwill 244IIO ??? [#f1]_ =============================== =============================================== diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 82b5e378eebf..a0beca805362 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -59,7 +59,6 @@ iptables 1.4.2 iptables -V openssl & libcrypto 1.0.0 openssl version bc 1.06.95 bc --version Sphinx\ [#f1]_ 2.4.4 sphinx-build --version -cpio any cpio --version GNU tar 1.28 tar --version gtags (optional) 6.6.5 gtags --version mkimage (optional) 2017.01 mkimage --version @@ -536,11 +535,6 @@ mcelog - <https://www.mcelog.org/> -cpio ----- - -- <https://www.gnu.org/software/cpio/> - Networking ********** diff --git a/Documentation/translations/sp_SP/index.rst b/Documentation/translations/sp_SP/index.rst index aae7018b0d1a..2b50283e1608 100644 --- a/Documentation/translations/sp_SP/index.rst +++ b/Documentation/translations/sp_SP/index.rst @@ -7,7 +7,7 @@ Traducción al español \kerneldocCJKoff -:maintainer: Carlos Bilbao <carlos.bilbao.osdev@gmail.com> +:maintainer: Carlos Bilbao <carlos.bilbao@kernel.org> .. _sp_disclaimer: diff --git a/MAINTAINERS b/MAINTAINERS index bc8ce7af3303..896a307fa065 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1090,7 +1090,7 @@ F: drivers/video/fbdev/geode/ AMD HSMP DRIVER M: Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com> -R: Carlos Bilbao <carlos.bilbao.osdev@gmail.com> +R: Carlos Bilbao <carlos.bilbao@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: Documentation/arch/x86/amd_hsmp.rst @@ -4102,6 +4102,7 @@ S: Supported W: http://www.bluez.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git +F: Documentation/ABI/stable/sysfs-class-bluetooth F: include/net/bluetooth/ F: net/bluetooth/ @@ -5856,7 +5857,7 @@ F: drivers/usb/atm/cxacru.c CONFIDENTIAL COMPUTING THREAT MODEL FOR X86 VIRTUALIZATION (SNP/TDX) M: Elena Reshetova <elena.reshetova@intel.com> -M: Carlos Bilbao <carlos.bilbao.osdev@gmail.com> +M: Carlos Bilbao <carlos.bilbao@kernel.org> S: Maintained F: Documentation/security/snp-tdx-threat-model.rst @@ -9640,6 +9641,13 @@ W: https://linuxtv.org T: git git://linuxtv.org/media.git F: drivers/media/radio/radio-gemtek* +GENDWARFKSYMS +M: Sami Tolvanen <samitolvanen@google.com> +L: linux-modules@vger.kernel.org +L: linux-kbuild@vger.kernel.org +S: Maintained +F: scripts/gendwarfksyms/ + GENERIC ARCHITECTURE TOPOLOGY M: Sudeep Holla <sudeep.holla@arm.com> L: linux-kernel@vger.kernel.org @@ -11323,7 +11331,7 @@ S: Orphan F: drivers/video/fbdev/imsttfb.c INDEX OF FURTHER KERNEL DOCUMENTATION -M: Carlos Bilbao <carlos.bilbao.osdev@gmail.com> +M: Carlos Bilbao <carlos.bilbao@kernel.org> S: Maintained F: Documentation/process/kernel-docs.rst @@ -16277,6 +16285,7 @@ F: drivers/scsi/sun3_scsi_vme.c NCSI LIBRARY M: Samuel Mendoza-Jonas <sam@mendozajonas.com> +R: Paul Fertser <fercerpav@gmail.com> S: Maintained F: net/ncsi/ @@ -16611,6 +16620,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet <edumazet@google.com> +M: Neal Cardwell <ncardwell@google.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -19443,7 +19453,7 @@ F: drivers/misc/fastrpc.c F: include/uapi/misc/fastrpc.h QUALCOMM HEXAGON ARCHITECTURE -M: Brian Cain <bcain@quicinc.com> +M: Brian Cain <brian.cain@oss.qualcomm.com> L: linux-hexagon@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/bcain/linux.git @@ -22205,7 +22215,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/sp2* SPANISH DOCUMENTATION -M: Carlos Bilbao <carlos.bilbao.osdev@gmail.com> +M: Carlos Bilbao <carlos.bilbao@kernel.org> R: Avadhut Naik <avadhut.naik@amd.com> S: Maintained F: Documentation/translations/sp_SP/ @@ -25729,11 +25739,13 @@ F: arch/x86/entry/vdso/ XARRAY M: Matthew Wilcox <willy@infradead.org> L: linux-fsdevel@vger.kernel.org +L: linux-mm@kvack.org S: Supported F: Documentation/core-api/xarray.rst F: include/linux/idr.h F: include/linux/xarray.h F: lib/idr.c +F: lib/test_xarray.c F: lib/xarray.c F: tools/testing/radix-tree @@ -26213,7 +26225,7 @@ K: zstd ZSWAP COMPRESSED SWAP CACHING M: Johannes Weiner <hannes@cmpxchg.org> -M: Yosry Ahmed <yosryahmed@google.com> +M: Yosry Ahmed <yosry.ahmed@linux.dev> M: Nhat Pham <nphamcs@gmail.com> R: Chengming Zhou <chengming.zhou@linux.dev> L: linux-mm@kvack.org diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 62da5827f471..f27e6b90428e 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -18,6 +18,7 @@ config ARC select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select ARCH_32BIT_OFF_T select BUILDTIME_TABLE_SORT + select GENERIC_BUILTIN_DTB select CLONE_BACKWARDS select COMMON_CLK select DMA_DIRECT_REMAP @@ -550,11 +551,11 @@ config ARC_DBG_JUMP_LABEL part of static keys (jump labels) related code. endif -config ARC_BUILTIN_DTB_NAME +config BUILTIN_DTB_NAME string "Built in DTB" + default "nsim_700" help - Set the name of the DTB to embed in the vmlinux binary - Leaving it blank selects the "nsim_700" dtb. + Set the name of the DTB to embed in the vmlinux binary. endmenu # "ARC Architecture Configuration" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index fb98478ed1ab..0c5e6e6314f2 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -82,9 +82,6 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_AFLAGS += $(KBUILD_CFLAGS) KBUILD_LDFLAGS += $(ldflags-y) -# w/o this dtb won't embed into kernel binary -core-y += arch/arc/boot/dts/ - core-y += arch/arc/plat-sim/ core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/ diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile index 48704dfdf75c..ee5664f0640d 100644 --- a/arch/arc/boot/dts/Makefile +++ b/arch/arc/boot/dts/Makefile @@ -1,13 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -# Built-in dtb -builtindtb-y := nsim_700 -ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),) - builtindtb-y := $(CONFIG_ARC_BUILTIN_DTB_NAME) -endif - -obj-y += $(builtindtb-y).dtb.o -dtb-y := $(builtindtb-y).dtb +dtb-y := $(addsuffix .dtb, $(CONFIG_BUILTIN_DTB_NAME)) # for CONFIG_OF_ALL_DTBS test dtb- := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 319bbe270322..a7cd526dd7ca 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -23,7 +23,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS101=y CONFIG_ARC_CACHE_LINE_SHIFT=5 -CONFIG_ARC_BUILTIN_DTB_NAME="axs101" +CONFIG_BUILTIN_DTB_NAME="axs101" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 8c1f1a111a17..afa6a348f444 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -22,7 +22,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="axs103" +CONFIG_BUILTIN_DTB_NAME="axs103" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 75cab9f25b5b..2bfa6371953c 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -22,7 +22,7 @@ CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y -CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu" +CONFIG_BUILTIN_DTB_NAME="axs103_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 8c3ed5d6e6c3..3a1577112078 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -14,7 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set -CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs" +CONFIG_BUILTIN_DTB_NAME="haps_hs" CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_COMPACTION is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 6fc98c1b9b36..a3cf940b1f5b 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SMP=y -CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu" +CONFIG_BUILTIN_DTB_NAME="haps_hs_idu" CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9e79154b5535..1558e8e87767 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -20,7 +20,7 @@ CONFIG_ISA_ARCV2=y CONFIG_SMP=y CONFIG_LINUX_LINK_BASE=0x90000000 CONFIG_LINUX_RAM_BASE=0x80000000 -CONFIG_ARC_BUILTIN_DTB_NAME="hsdk" +CONFIG_BUILTIN_DTB_NAME="hsdk" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 51092c39e360..f8b3235d9a65 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -17,7 +17,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_ISA_ARCOMPACT=y -CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700" +CONFIG_BUILTIN_DTB_NAME="nsim_700" CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 70c17bca4939..ee45dc0877fb 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -19,7 +19,7 @@ CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" +CONFIG_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 59a3b6642fe7..e0a309970c20 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -19,7 +19,7 @@ CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs" +CONFIG_BUILTIN_DTB_NAME="nsimosci_hs" # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 1419fc946a08..88325b8b49cf 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -16,7 +16,7 @@ CONFIG_MODULES=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu" +CONFIG_BUILTIN_DTB_NAME="nsimosci_hs_idu" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 5aba3d850fa2..865fbc19ef03 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -26,7 +26,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARC_PLAT_TB10X=y CONFIG_ARC_CACHE_LINE_SHIFT=5 CONFIG_HZ=250 -CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk" +CONFIG_BUILTIN_DTB_NAME="abilis_tb100_dvk" CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 50c343913825..03d9ac20baa9 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -13,7 +13,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_ARC_PLAT_AXS10X=y CONFIG_AXS103=y CONFIG_ISA_ARCV2=y -CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38" +CONFIG_BUILTIN_DTB_NAME="vdk_hs38" CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 6d9e1d9f71d2..c09488992f13 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -15,7 +15,7 @@ CONFIG_AXS103=y CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set -CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp" +CONFIG_BUILTIN_DTB_NAME="vdk_hs38_smp" CONFIG_PREEMPT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index bf6cf5579cf4..9c58fb81f7fd 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -56,7 +56,7 @@ __arch_xchg(unsigned long x, volatile void *ptr, int size) __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(ptr)) __old = (old); \ __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __oldval = 0; \ + __typeof__(*(ptr)) __oldval = (__typeof__(*(ptr))) 0; \ \ asm volatile( \ "1: %0 = memw_locked(%1);\n" \ diff --git a/arch/hexagon/include/asm/setup.h b/arch/hexagon/include/asm/setup.h new file mode 100644 index 000000000000..9f2749cd4052 --- /dev/null +++ b/arch/hexagon/include/asm/setup.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + */ + +#ifndef _ASM_HEXAGON_SETUP_H +#define _ASM_HEXAGON_SETUP_H + +#include <linux/init.h> +#include <uapi/asm/setup.h> + +extern char external_cmdline_buffer; + +void __init setup_arch_memory(void); + +#endif diff --git a/arch/hexagon/include/uapi/asm/setup.h b/arch/hexagon/include/uapi/asm/setup.h index 8ce9428b1583..598f74f671f6 100644 --- a/arch/hexagon/include/uapi/asm/setup.h +++ b/arch/hexagon/include/uapi/asm/setup.h @@ -17,19 +17,9 @@ * 02110-1301, USA. */ -#ifndef _ASM_SETUP_H -#define _ASM_SETUP_H - -#ifdef __KERNEL__ -#include <linux/init.h> -#else -#define __init -#endif +#ifndef _UAPI_ASM_HEXAGON_SETUP_H +#define _UAPI_ASM_HEXAGON_SETUP_H #include <asm-generic/setup.h> -extern char external_cmdline_buffer; - -void __init setup_arch_memory(void); - #endif diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c index f0f207e2a694..6f851e1cd4ee 100644 --- a/arch/hexagon/kernel/time.c +++ b/arch/hexagon/kernel/time.c @@ -170,8 +170,7 @@ static void __init time_init_deferred(void) ce_dev->cpumask = cpu_all_mask; - if (!resource) - resource = rtos_timer_device.resource; + resource = rtos_timer_device.resource; /* ioremap here means this has to run later, after paging init */ rtos_timer = ioremap(resource->start, resource_size(resource)); diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 75e062722d28..e732aa01c2ff 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -135,7 +135,7 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, } /* Attempt to continue past exception. */ - if (0 == newfp) { + if (!newfp) { struct pt_regs *regs = (struct pt_regs *) (((void *)fp) + 8); @@ -195,8 +195,10 @@ int die(const char *str, struct pt_regs *regs, long err) printk(KERN_EMERG "Oops: %s[#%d]:\n", str, ++die.counter); if (notify_die(DIE_OOPS, str, regs, err, pt_cause(regs), SIGSEGV) == - NOTIFY_STOP) + NOTIFY_STOP) { + spin_unlock_irq(&die.lock); return 1; + } print_modules(); show_regs(regs); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 8acfa66e1095..dbf2ea561c85 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -626,6 +626,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 35e9a0872304..b0fd199cc0a4 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -583,6 +583,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 32891ddd3cc5..bb5b2d3b6c10 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -603,6 +603,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index ca276f0db3dd..8315a13bab73 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index e83f14fe1a4f..350370657e5f 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -585,6 +585,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 6b58be24da79..f942b4755702 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -602,6 +602,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 0e8d24f82565..b1eaad02efab 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -689,6 +689,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 24a7608c13ac..6309a4442bb3 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index c415f75821f3..3feb0731f814 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -576,6 +576,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 2c715a8ff551..ea04b1b0da7d 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -592,6 +592,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 15ff37fcccbf..f52d9af92153 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -572,6 +572,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 40a44bf9f48d..f348447824da 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -573,6 +573,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index e9c46b59ebbc..465eb96c755e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -448,6 +448,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 45dac7b46aa3..34a5aec4908f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -369,6 +369,24 @@ static void dedotify_versions(struct modversion_info *vers, } } +/* Same as normal versions, remove a leading dot if present. */ +static void dedotify_ext_version_names(char *str_seq, unsigned long size) +{ + unsigned long out = 0; + unsigned long in; + char last = '\0'; + + for (in = 0; in < size; in++) { + /* Skip one leading dot */ + if (last == '\0' && str_seq[in] == '.') + in++; + last = str_seq[in]; + str_seq[out++] = last; + } + /* Zero the trailing portion of the names table for robustness */ + memset(&str_seq[out], 0, size - out); +} + /* * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC. * seem to be defined (value set later). @@ -438,10 +456,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) sechdrs[i].sh_addralign = 8; - } - else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0) + dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 2acc7d876e1f..e318119d570d 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -119,4 +119,15 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_GHOSTWRITE + bool "Apply T-Head Ghostwrite errata" + depends on ERRATA_THEAD && RISCV_ISA_XTHEADVECTOR + default y + help + The T-Head C9xx cores have a vulnerability in the xtheadvector + instruction set. When this errata is enabled, the CPUs will be probed + to determine if they are vulnerable and disable xtheadvector. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/Kconfig.vendor b/arch/riscv/Kconfig.vendor index 6f1cdd32ed29..b096548fe0ff 100644 --- a/arch/riscv/Kconfig.vendor +++ b/arch/riscv/Kconfig.vendor @@ -16,4 +16,30 @@ config RISCV_ISA_VENDOR_EXT_ANDES If you don't know what to do here, say Y. endmenu +menu "T-Head" +config RISCV_ISA_VENDOR_EXT_THEAD + bool "T-Head vendor extension support" + select RISCV_ISA_VENDOR_EXT + default y + help + Say N here to disable detection of and support for all T-Head vendor + extensions. Without this option enabled, T-Head vendor extensions will + not be detected at boot and their presence not reported to userspace. + + If you don't know what to do here, say Y. + +config RISCV_ISA_XTHEADVECTOR + bool "xtheadvector extension support" + depends on RISCV_ISA_VENDOR_EXT_THEAD + depends on RISCV_ISA_V + depends on FPU + default y + help + Say N here if you want to disable all xtheadvector related procedures + in the kernel. This will disable vector for any T-Head board that + contains xtheadvector rather than the standard vector. + + If you don't know what to do here, say Y. +endmenu + endmenu diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink index 829b9abc91f6..6b0580949b6a 100644 --- a/arch/riscv/Makefile.postlink +++ b/arch/riscv/Makefile.postlink @@ -10,6 +10,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib quiet_cmd_relocs_check = CHKREL $@ cmd_relocs_check = \ @@ -19,11 +20,6 @@ ifdef CONFIG_RELOCATABLE quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs -quiet_cmd_relocs_strip = STRIPREL $@ -cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \ - --remove-section='.rel__*' \ - --remove-section='.rela.*' \ - --remove-section='.rela__*' $@ endif # `@true` prevents complaint when there is nothing to be done @@ -33,7 +29,7 @@ vmlinux: FORCE ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) $(call if_changed,cp_vmlinux_relocs) - $(call if_changed,relocs_strip) + $(call if_changed,strip_relocs) endif clean: diff --git a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi index 64c3c2e6cbe0..6367112e614a 100644 --- a/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi +++ b/arch/riscv/boot/dts/allwinner/sun20i-d1s.dtsi @@ -27,7 +27,8 @@ riscv,isa = "rv64imafdc"; riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "zicntr", "zicsr", - "zifencei", "zihpm"; + "zifencei", "zihpm", "xtheadvector"; + thead,vlenb = <128>; #cooling-cells = <2>; cpu0_intc: interrupt-controller { diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index a924ef116d5e..0f7dcbe3c45b 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -10,7 +10,6 @@ CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index e24770a77932..0b942183f708 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -10,6 +10,7 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/dma-noncoherent.h> @@ -142,6 +143,31 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_ghostwrite(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_GHOSTWRITE)) + return false; + + /* + * target-c9xx cores report arch_id and impid as 0 + * + * While ghostwrite may not affect all c9xx cores that implement + * xtheadvector, there is no futher granularity than c9xx. Assume + * vulnerable for this entire class of processors when xtheadvector is + * enabled. + */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + ghostwrite_set_vulnerable(); + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -155,6 +181,8 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + errata_probe_ghostwrite(stage, archid, impid); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/bugs.h b/arch/riscv/include/asm/bugs.h new file mode 100644 index 000000000000..17ca0a947730 --- /dev/null +++ b/arch/riscv/include/asm/bugs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for riscv vulnerabilities. + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef __ASM_BUGS_H +#define __ASM_BUGS_H + +/* Watch out, ordering is important here. */ +enum mitigation_state { + UNAFFECTED, + MITIGATED, + VULNERABLE, +}; + +void ghostwrite_set_vulnerable(void); +bool ghostwrite_enable_mitigation(void); +enum mitigation_state ghostwrite_get_state(void); + +#endif /* __ASM_BUGS_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 4bd054c54c21..569140d6e639 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -34,6 +34,8 @@ DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; +extern u32 thead_vlenb_of; + void __init riscv_user_isa_enable(void); #define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size, _validate) { \ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 37bdea65bbd8..6fed42e37705 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -30,6 +30,12 @@ #define SR_VS_CLEAN _AC(0x00000400, UL) #define SR_VS_DIRTY _AC(0x00000600, UL) +#define SR_VS_THEAD _AC(0x01800000, UL) /* xtheadvector Status */ +#define SR_VS_OFF_THEAD _AC(0x00000000, UL) +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) + #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) #define SR_XS_INITIAL _AC(0x00008000, UL) @@ -315,6 +321,15 @@ #define CSR_STIMECMP 0x14D #define CSR_STIMECMPH 0x15D +/* xtheadvector symbolic CSR names */ +#define CSR_VXSAT 0x9 +#define CSR_VXRM 0xa + +/* xtheadvector CSR masks */ +#define CSR_VXRM_MASK 3 +#define CSR_VXRM_SHIFT 1 +#define CSR_VXSAT_MASK 1 + /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */ #define CSR_SISELECT 0x150 #define CSR_SIREG 0x151 diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 7c8a71a526a3..6e426ed7919a 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -25,7 +25,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_GHOSTWRITE 2 +#define ERRATA_THEAD_NUMBER 3 #endif #ifdef __ASSEMBLY__ diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index fc8130f995c1..72be100afa23 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -85,7 +85,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __enable_user_access(); __asm__ __volatile__ ( - "1: lr.w.aqrl %[v],%[u] \n" + "1: lr.w %[v],%[u] \n" " bne %[v],%z[ov],3f \n" "2: sc.w.aqrl %[t],%z[nv],%[u] \n" " bnez %[t],1b \n" diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 1ce1df6d0ff3..dd624523981c 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _ASM_HWPROBE_H @@ -8,7 +8,7 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 10 +#define RISCV_HWPROBE_MAX_KEY 11 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { @@ -21,6 +21,7 @@ static inline bool hwprobe_key_is_bitmask(__s64 key) case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: case RISCV_HWPROBE_KEY_IMA_EXT_0: case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: return true; } diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 94e33216b2d9..0e71eb82f920 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -117,7 +117,7 @@ do { \ __set_prev_cpu(__prev->thread); \ if (has_fpu()) \ __switch_to_fpu(__prev, __next); \ - if (has_vector()) \ + if (has_vector() || has_xtheadvector()) \ __switch_to_vector(__prev, __next); \ if (switch_to_should_flush_icache(__next)) \ local_flush_icache_all(); \ diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c7c023afbacd..e8a83f55be2b 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -18,6 +18,27 @@ #include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/asm.h> +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#define __riscv_v_vstate_or(_val, TYPE) ({ \ + typeof(_val) _res = _val; \ + if (has_xtheadvector()) \ + _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \ + else \ + _res = (_res & ~SR_VS) | SR_VS_##TYPE; \ + _res; \ +}) + +#define __riscv_v_vstate_check(_val, TYPE) ({ \ + bool _res; \ + if (has_xtheadvector()) \ + _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \ + else \ + _res = ((_val) & SR_VS) == SR_VS_##TYPE; \ + _res; \ +}) extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); @@ -41,39 +62,62 @@ static __always_inline bool has_vector(void) return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X); } +static __always_inline bool has_xtheadvector_no_alternatives(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR); + else + return false; +} + +static __always_inline bool has_xtheadvector(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID, + RISCV_ISA_VENDOR_EXT_XTHEADVECTOR); + else + return false; +} + static inline void __riscv_v_vstate_clean(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN; + regs->status = __riscv_v_vstate_or(regs->status, CLEAN); } static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY; + regs->status = __riscv_v_vstate_or(regs->status, DIRTY); } static inline void riscv_v_vstate_off(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_OFF; + regs->status = __riscv_v_vstate_or(regs->status, OFF); } static inline void riscv_v_vstate_on(struct pt_regs *regs) { - regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL; + regs->status = __riscv_v_vstate_or(regs->status, INITIAL); } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { - return (regs->status & SR_VS) != 0; + return !__riscv_v_vstate_check(regs->status, OFF); } static __always_inline void riscv_v_enable(void) { - csr_set(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_set(CSR_SSTATUS, SR_VS_THEAD); + else + csr_set(CSR_SSTATUS, SR_VS); } static __always_inline void riscv_v_disable(void) { - csr_clear(CSR_SSTATUS, SR_VS); + if (has_xtheadvector()) + csr_clear(CSR_SSTATUS, SR_VS_THEAD); + else + csr_clear(CSR_SSTATUS, SR_VS); } static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) @@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" - "csrr %3, " __stringify(CSR_VCSR) "\n\t" - "csrr %4, " __stringify(CSR_VLENB) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr), "=r" (dest->vlenb) : :); + "=r" (dest->vcsr) : :); + + if (has_xtheadvector()) { + unsigned long status; + + /* + * CSR_VCSR is defined as + * [2:1] - vxrm[1:0] + * [0] - vxsat + * The earlier vector spec implemented by T-Head uses separate + * registers for the same bit-elements, so just combine those + * into the existing output field. + * + * Additionally T-Head cores need FS to be enabled when accessing + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. + * Though the cores do not implement the VXRM and VXSAT fields in the + * FCSR CSR that vector-0.7.1 specifies. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT; + + dest->vlenb = riscv_v_vsize / 32; + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + dest->vcsr = csr_read(CSR_VCSR); + dest->vlenb = csr_read(CSR_VLENB); + } } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) @@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" - "csrw " __stringify(CSR_VCSR) ", %3\n\t" - : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl)); + + if (has_xtheadvector()) { + unsigned long status = csr_read(CSR_SSTATUS); + + /* + * Similar to __vstate_csr_save above, restore values for the + * separate VXRM and VXSAT CSRs from the vcsr variable. + */ + status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); + + csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK); + csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK); + + if ((status & SR_FS) != SR_FS_DIRTY) + csr_write(CSR_STATUS, status); + } else { + csr_write(CSR_VCSR, src->vcsr); + } } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, riscv_v_enable(); __vstate_csr_save(save_to); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VSB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } riscv_v_disable(); } @@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( - ".option push\n\t" - ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vle8.v v0, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + if (has_xtheadvector()) { + asm volatile ( + "mv t0, %0\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + "add t0, t0, t4\n\t" + THEAD_VLB_V_V0T0 + : : "r" (datap) : "memory", "t0", "t4"); + } else { + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vle8.v v0, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vle8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl) : "r" (datap) : "memory"); + } __vstate_csr_restore(restore_from); riscv_v_disable(); } @@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void) unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); riscv_v_enable(); + if (has_xtheadvector()) + asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4"); + else + asm volatile ( + ".option push\n\t" + ".option arch, +zve32x\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + ".option pop\n\t": "=&r" (vl)); + asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" "vmv.v.i v0, -1\n\t" "vmv.v.i v8, -1\n\t" "vmv.v.i v16, -1\n\t" "vmv.v.i v24, -1\n\t" "vsetvl %0, x0, %1\n\t" ".option pop\n\t" - : "=&r" (vl) : "r" (vtype_inval) : "memory"); + : "=&r" (vl) : "r" (vtype_inval)); + riscv_v_disable(); } static inline void riscv_v_vstate_discard(struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_OFF) - return; - - __riscv_v_vstate_discard(); - __riscv_v_vstate_dirty(regs); + if (riscv_v_vstate_query(regs)) { + __riscv_v_vstate_discard(); + __riscv_v_vstate_dirty(regs); + } } static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(regs->status, DIRTY)) { __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } @@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, static inline void riscv_v_vstate_set_restore(struct task_struct *task, struct pt_regs *regs) { - if ((regs->status & SR_VS) != SR_VS_OFF) { + if (riscv_v_vstate_query(regs)) { set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); riscv_v_vstate_on(regs); } @@ -270,6 +392,8 @@ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } +static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; } +static __always_inline bool has_xtheadvector(void) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } diff --git a/arch/riscv/include/asm/vendor_extensions/thead.h b/arch/riscv/include/asm/vendor_extensions/thead.h new file mode 100644 index 000000000000..e85c75b3b340 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_H + +#include <asm/vendor_extensions.h> + +#include <linux/types.h> + +/* + * Extension keys must be strictly less than RISCV_ISA_VENDOR_EXT_MAX. + */ +#define RISCV_ISA_VENDOR_EXT_XTHEADVECTOR 0 + +extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead; + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void disable_xtheadvector(void); +#else +static inline void disable_xtheadvector(void) { } +#endif + +/* Extension specific helpers */ + +/* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + +/* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h new file mode 100644 index 000000000000..65a9c5612466 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/thead_hwprobe.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H +#define _ASM_RISCV_VENDOR_EXTENSIONS_THEAD_HWPROBE_H + +#include <linux/cpumask.h> + +#include <uapi/asm/hwprobe.h> + +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus); +#else +static inline void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + pair->value = 0; +} +#endif + +#endif diff --git a/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h new file mode 100644 index 000000000000..6b9293e984a9 --- /dev/null +++ b/arch/riscv/include/asm/vendor_extensions/vendor_hwprobe.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2024 Rivos, Inc + */ + +#ifndef _ASM_RISCV_SYS_HWPROBE_H +#define _ASM_RISCV_SYS_HWPROBE_H + +#include <asm/cpufeature.h> + +#define VENDOR_EXT_KEY(ext) \ + do { \ + if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_VENDOR_EXT_##ext)) \ + pair->value |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + else \ + missing |= RISCV_HWPROBE_VENDOR_EXT_##ext; \ + } while (false) + +/* + * Loop through and record extensions that 1) anyone has, and 2) anyone + * doesn't have. + * + * _extension_checks is an arbitrary C block to set the values of pair->value + * and missing. It should be filled with VENDOR_EXT_KEY expressions. + */ +#define VENDOR_EXTENSION_SUPPORTED(pair, cpus, per_hart_vendor_bitmap, _extension_checks) \ + do { \ + int cpu; \ + u64 missing = 0; \ + for_each_cpu(cpu, (cpus)) { \ + struct riscv_isavendorinfo *isainfo = &(per_hart_vendor_bitmap)[cpu]; \ + _extension_checks \ + } \ + (pair)->value &= ~missing; \ + } while (false) \ + +#endif /* _ASM_RISCV_SYS_HWPROBE_H */ diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 3af142b99f77..c3c1cc951cb9 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * Copyright 2023 Rivos, Inc + * Copyright 2023-2024 Rivos, Inc */ #ifndef _UAPI_ASM_HWPROBE_H @@ -94,6 +94,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW 2 #define RISCV_HWPROBE_MISALIGNED_VECTOR_FAST 3 #define RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED 4 +#define RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0 11 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/include/uapi/asm/vendor/thead.h b/arch/riscv/include/uapi/asm/vendor/thead.h new file mode 100644 index 000000000000..43790ebe5faf --- /dev/null +++ b/arch/riscv/include/uapi/asm/vendor/thead.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#define RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR (1 << 0) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 063d1faf5a53..8d186bfced45 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -123,3 +123,5 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 000000000000..3655fe7d678c --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sprintf.h> + +#include <asm/bugs.h> +#include <asm/vendor_extensions/thead.h> + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c0916ed318c2..c6ba750536c3 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -17,6 +17,7 @@ #include <linux/of.h> #include <asm/acpi.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> @@ -26,6 +27,7 @@ #include <asm/sbi.h> #include <asm/vector.h> #include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -39,6 +41,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; +u32 thead_vlenb_of; + /** * riscv_isa_extension_base() - Get base extension word * @@ -791,9 +795,50 @@ static void __init riscv_fill_vendor_ext_list(int cpu) } } +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; @@ -844,6 +889,17 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) riscv_fill_vendor_ext_list(cpu); } + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); + } + if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) return -ENOENT; @@ -896,7 +952,8 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) { + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { /* * This cannot fail when called on the boot hart */ diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c index 6afe80c7f03a..99972a48e86b 100644 --- a/arch/riscv/kernel/kernel_mode_vector.c +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested) /* Transfer the ownership of V from user to kernel, then save */ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); - if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { uvstate = ¤t->thread.vstate; __riscv_v_vstate_save(uvstate, uvstate->datap); } @@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) return; depth = riscv_v_ctx_get_depth(); - if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) riscv_preempt_v_set_dirty(); riscv_v_ctx_depth_inc(); @@ -208,7 +208,7 @@ void kernel_vector_begin(void) { bool nested = false; - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; BUG_ON(!may_use_simd()); @@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin); */ void kernel_vector_end(void) { - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; riscv_v_disable(); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7891294abf49..7c244de77180 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -190,7 +190,7 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_free(tsk); } @@ -240,7 +240,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = 0; } p->thread.riscv_v_flags = 0; - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index dcd282419456..94e905eea1de 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -189,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -211,7 +211,7 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } @@ -284,7 +284,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index cb93adfffc48..bcd3b816306c 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -15,6 +15,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/vector.h> +#include <asm/vendor_extensions/thead_hwprobe.h> #include <vdso/vsyscall.h> @@ -286,6 +287,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, pair->value = riscv_timebase; break; + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0 diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index d022b028ac3f..184f780c932d 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void) void __init riscv_v_setup_ctx_cache(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", @@ -173,7 +183,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return false; /* Do not handle if V is not supported, or disabled */ @@ -216,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -238,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -249,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -299,7 +309,7 @@ static const struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -309,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index a8126d118341..a31ff84740eb 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -6,6 +6,7 @@ #include <asm/vendorid_list.h> #include <asm/vendor_extensions.h> #include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/thead.h> #include <linux/array_size.h> #include <linux/types.h> @@ -14,6 +15,9 @@ struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES &riscv_isa_vendor_ext_list_andes, #endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif }; const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); @@ -41,6 +45,12 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif default: return false; } diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile index 6a61aed944f1..866414c81a9f 100644 --- a/arch/riscv/kernel/vendor_extensions/Makefile +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 000000000000..519dbf70710a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/cpumask.h> +#include <linux/types.h> + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 000000000000..2eba34011786 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/thead.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/thead.h> + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a9f2b4af8f3f..0194324a0c50 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -22,6 +22,57 @@ #include "../kernel/head.h" +static void show_pte(unsigned long addr) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + struct mm_struct *mm = current->mm; + + if (!mm) + mm = &init_mm; + + pr_alert("Current %s pgtable: %luK pagesize, %d-bit VAs, pgdp=0x%016llx\n", + current->comm, PAGE_SIZE / SZ_1K, VA_BITS, + mm == &init_mm ? (u64)__pa_symbol(mm->pgd) : virt_to_phys(mm->pgd)); + + pgdp = pgd_offset(mm, addr); + pgd = pgdp_get(pgdp); + pr_alert("[%016lx] pgd=%016lx", addr, pgd_val(pgd)); + if (pgd_none(pgd) || pgd_bad(pgd) || pgd_leaf(pgd)) + goto out; + + p4dp = p4d_offset(pgdp, addr); + p4d = p4dp_get(p4dp); + pr_cont(", p4d=%016lx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d) || p4d_leaf(p4d)) + goto out; + + pudp = pud_offset(p4dp, addr); + pud = pudp_get(pudp); + pr_cont(", pud=%016lx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud) || pud_leaf(pud)) + goto out; + + pmdp = pmd_offset(pudp, addr); + pmd = pmdp_get(pmdp); + pr_cont(", pmd=%016lx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd) || pmd_leaf(pmd)) + goto out; + + ptep = pte_offset_map(pmdp, addr); + if (!ptep) + goto out; + + pte = ptep_get(ptep); + pr_cont(", pte=%016lx", pte_val(pte)); + pte_unmap(ptep); +out: + pr_cont("\n"); +} + static void die_kernel_fault(const char *msg, unsigned long addr, struct pt_regs *regs) { @@ -31,6 +82,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, addr); bust_spinlocks(0); + show_pte(addr); die(regs, "Oops"); make_task_dead(SIGKILL); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 722178ae3488..15b2eda4c364 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -268,8 +268,12 @@ static void __init setup_bootmem(void) */ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) { max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; - memblock_cap_memory_range(phys_ram_base, - max_mapped_addr - phys_ram_base); + if (memblock_end_of_DRAM() > max_mapped_addr) { + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + pr_warn("Physical memory overflows the linear mapping size: region above %pa removed", + &max_mapped_addr); + } } /* diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6e9545d8b0c7..9c9ec08d78c7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -52,13 +52,19 @@ config KASAN_SHADOW_OFFSET depends on KASAN default 0x1C000000000000 -config GCC_ASM_FLAG_OUTPUT_BROKEN +config CC_ASM_FLAG_OUTPUT_BROKEN def_bool CC_IS_GCC && GCC_VERSION < 140200 help GCC versions before 14.2.0 may die with an internal compiler error in some configurations if flag output operands are used within inline assemblies. +config CC_HAS_ASM_AOR_FORMAT_FLAGS + def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100) + help + Clang versions before 19.1.0 do not support A, + O, and R inline assembly format flags. + config S390 def_bool y # @@ -72,6 +78,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 3f25498dac65..5fae311203c2 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index df82f5410769..1ae5478cd6ac 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -11,6 +11,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib CMD_RELOCS=arch/s390/tools/relocs OUT_RELOCS = arch/s390/boot @@ -19,11 +20,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - vmlinux: FORCE $(call cmd,relocs) $(call cmd,strip_relocs) diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 11e0c3d5dbc8..79afb5fa7f1f 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -46,7 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -54,7 +54,7 @@ void print_missing_facilities(void) first = 0; } } - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) @@ -62,10 +62,10 @@ static void facility_mismatch(void) struct cpuid id; get_cpu_id(&id); - boot_printk("The Linux kernel requires more recent processor hardware\n"); - boot_printk("Detected machine-type number: %4x\n", id.machine); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - boot_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See Principles of Operations for facility bits\n"); disabled_wait(); } diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 56244fe78182..69f261566a64 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,7 @@ #ifndef __ASSEMBLY__ +#include <linux/printk.h> #include <asm/physmem_info.h> struct machine_info { @@ -47,13 +48,16 @@ void physmem_set_usable_limit(unsigned long limit); void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align); +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); @@ -69,12 +73,28 @@ void print_pgm_check_info(void); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); -void __printf(1, 2) boot_printk(const char *fmt, ...); +int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); + +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; /* Symbols defined by linker scripts */ extern const char kernel_version[]; diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index f478e8e9cbda..03500b9d9fb9 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/boot_data.h> #include <asm/page.h> #include "decompressor.h" #include "boot.h" @@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unzstd.c" #endif +static void decompress_error(char *m) +{ + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} + unsigned long mem_safe_offset(void) { return ALIGN(free_mem_end_ptr, PAGE_SIZE); @@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void) void deploy_kernel(void *output) { __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, vmlinux.image_size, NULL, error); + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 557462e62cd7..d3731f2983b7 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -215,7 +215,7 @@ static void check_cleared_facilities(void) for (i = 0; i < ARRAY_SIZE(als); i++) { if ((stfle_fac_list[i] & als[i]) != als[i]) { - boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } @@ -313,5 +313,23 @@ void parse_boot_command_line(void) #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) relocate_lowcore = 1; + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) { + bootdebug = true; + if (val) + strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1); + } + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } } } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index d00898852a88..f73cd757a5f7 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data @@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void) return; size = get_cert_comp_list_size(); - early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; copy_components_bootdata(); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index f864d2bff775..941f4c9e27cc 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -32,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - boot_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a * cannot have chains. * * On the other hand, "dynamic" or "repetitive" allocations are done via - * physmem_alloc_top_down(). These allocations are tightly packed together + * physmem_alloc_or_die(). These allocations are tightly packed together * top down from the end of online memory. physmem_alloc_pos represents * current position where those allocations start. * diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 5abe59fb3bc0..633f11600aab 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -17,13 +17,14 @@ void print_stacktrace(unsigned long sp) (unsigned long)_stack_end }; bool first = true; - boot_printk("Call Trace:\n"); + boot_emerg("Call Trace:\n"); while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { struct stack_frame *sf = (struct stack_frame *)sp; - boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : - " sp:%016lx [<%016lx>] %pS\n", - sp, sf->gprs[8], (void *)sf->gprs[8]); + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); if (sf->back_chain <= sp) break; sp = sf->back_chain; @@ -36,30 +37,30 @@ void print_pgm_check_info(void) unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); - boot_printk("Linux version %s\n", kernel_version); + if (bootdebug) + boot_rb_dump(); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Kernel fault: interruption code %04x ilc:%x\n", - get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", + get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); if (kaslr_enabled()) { - boot_printk("Kernel random base: %lx\n", __kaslr_offset); - boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys); + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); } - boot_printk("PSW : %016lx %016lx (%pS)\n", - get_lowcore()->psw_save_area.mask, - get_lowcore()->psw_save_area.addr, - (void *)get_lowcore()->psw_save_area.addr); - boot_printk( - " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", - psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, - psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, - psw->eaba); - boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + boot_emerg("PSW : %016lx %016lx (%pS)\n", + get_lowcore()->psw_save_area.mask, + get_lowcore()->psw_save_area.addr, + (void *)get_lowcore()->psw_save_area.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); print_stacktrace(get_lowcore()->gpregs_save_area[15]); - boot_printk("Last Breaking-Event-Address:\n"); - boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, - (void *)get_lowcore()->pgm_last_break); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, + (void *)get_lowcore()->pgm_last_break); } diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 7617aa2d2f7e..aa096ef68e8c 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt #include <linux/processor.h> #include <linux/errno.h> #include <linux/init.h> @@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) return &physmem_info.online[n]; if (unlikely(!physmem_info.online_extended)) { physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( - RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, physmem_alloc_pos, true); } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; @@ -207,11 +208,16 @@ unsigned long detect_max_physmem_end(void) max_physmem_end = search_mem_end(); physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); return max_physmem_end; } void detect_physmem_online_ranges(unsigned long max_physmem_end) { + unsigned long start, end; + int i; + if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { @@ -226,12 +232,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end) } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); } void physmem_set_usable_limit(unsigned long limit) { physmem_info.usable = limit; physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); } static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) @@ -241,38 +251,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, enum reserved_range_type t; int i; - boot_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", - size, align, min, max); - boot_printk("Reserved memory ranges:\n"); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { - boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - boot_printk("Usable online memory ranges (info source: %s [%x]):\n", - get_physmem_info_source(), physmem_info.info_source); + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", + get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { - boot_printk("%016lx %016lx\n", start, end); + boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", - total_mem, total_reserved_mem, - total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); - boot_printk("\n\n -- System halted\n"); + boot_emerg(" -- System halted\n"); disabled_wait(); } -void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) { physmem_info.reserved[type].start = addr; physmem_info.reserved[type].end = addr + size; } +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + void physmem_free(enum reserved_range_type type) { + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); physmem_info.reserved[type].start = 0; physmem_info.reserved[type].end = 0; } @@ -339,41 +358,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s max = min(max, physmem_alloc_pos); addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); if (addr) - physmem_reserve(type, addr, size); + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); return addr; } -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; - struct reserved_range *new_range; + struct reserved_range *new_range = NULL; unsigned int ranges_left; unsigned long addr; addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, - &ranges_left, true); + &ranges_left, die_on_oom); + if (!addr) + return 0; /* if not a consecutive allocation of the same type or first allocation */ if (range->start != addr + size) { if (range->end) { - physmem_alloc_pos = __physmem_alloc_range( - sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, - physmem_alloc_ranges, &ranges_left, true); - new_range = (struct reserved_range *)physmem_alloc_pos; + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; *new_range = *range; range->chain = new_range; - addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, - ranges_left, &ranges_left, true); } range->end = addr + size; } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } range->start = addr; physmem_alloc_pos = addr; physmem_alloc_ranges = ranges_left; return addr; } +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; } + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 35f18f2b936e..b4c66fa667d5 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -5,21 +5,111 @@ #include <linux/ctype.h> #include <asm/stacktrace.h> #include <asm/boot_data.h> +#include <asm/sections.h> #include <asm/lowcore.h> #include <asm/setup.h> #include <asm/sclp.h> #include <asm/uv.h> #include "boot.h" +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + strcpy(boot_rb + boot_rb_off, str); + boot_rb_off += len + 1; +} + +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + const char hex_asc[] = "0123456789abcdef"; static char *as_hex(char *dst, unsigned long val, int pad) { - char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); - for (*p-- = 0; p >= dst; val >>= 4) + for (*p-- = '\0'; p >= dst; val >>= 4) *p-- = hex_asc[val & 0x0f]; - return end; + return dst; +} + +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad, bool decimal) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; } static char *symstart(char *p) @@ -58,35 +148,94 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh return NULL; } -static noinline char *strsym(void *ip) +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) { - static char buf[64]; unsigned short off; unsigned short len; char *p; p = findsym((unsigned long)ip, &off, &len); if (p) { - strncpy(buf, p, sizeof(buf)); + strncpy(buf, p, MAX_SYMLEN); /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ - p = buf + strnlen(buf, sizeof(buf) - 15); + p = buf + strnlen(buf, MAX_SYMLEN - 15); strcpy(p, "+0x"); - p = as_hex(p + 3, off, 0); - strcpy(p, "/0x"); - as_hex(p + 3, len, 0); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); } else { as_hex(buf, (unsigned long)ip, 16); } return buf; } -void boot_printk(const char *fmt, ...) +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; + buf = printk_skip_level(buf); + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) + return; + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(buf); +} + +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + union tod_clock *boot_clock = (union tod_clock *)&get_lowcore()->boot_clock; + unsigned long ns = tod_to_ns(get_tod_clock() - boot_clock->tod); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + +int boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - unsigned long pad; - char *p = buf; + bool zero_pad, decimal; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; va_list args; + char lenmod; + ssize_t len; + int pad; + + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); va_start(args, fmt); for (; p < end && *fmt; fmt++) { @@ -94,31 +243,56 @@ void boot_printk(const char *fmt, ...) *p++ = *fmt; continue; } - pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } + decimal = false; switch (*fmt) { case 's': - p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + if (lenmod) + goto out; + strval = va_arg(args, char *); + zero_pad = false; break; case 'p': - if (*++fmt != 'S') + if (*++fmt != 'S' || lenmod) goto out; - p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; break; - case 'l': - if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned long), pad); + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; case 'x': - if (end - p <= max(sizeof(int) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned int), pad); + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; default: goto out; } + len = strpad(p, end - p, strval, pad, zero_pad, decimal); + if (len == -E2BIG) + break; + p += len; } out: va_end(args); - sclp_early_printk(buf); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } + return len; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index e6b06692ddc8..885bd1dd2c82 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include <linux/string.h> #include <linux/elf.h> #include <asm/page-states.h> @@ -42,7 +43,8 @@ struct machine_info machine; void error(char *x) { - boot_printk("\n\n%s\n\n -- System halted", x); + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } @@ -143,7 +145,7 @@ static void rescue_initrd(unsigned long min, unsigned long max) return; old_addr = addr; physmem_free(RR_INITRD); - addr = physmem_alloc_top_down(RR_INITRD, size, 0); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); memmove((void *)addr, (void *)old_addr, size); } @@ -222,12 +224,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end) if (oldmem_data.start) { __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { __kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } } #endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); } #define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) @@ -267,6 +273,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || (vsize > _REGION2_SIZE && kaslr_enabled())) { asce_limit = _REGION1_SIZE; @@ -290,8 +297,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) * otherwise asce_limit and rte_size would have been adjusted. */ vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); #ifdef CONFIG_KASAN BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif @@ -305,19 +314,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) pos = 0; kernel_end = vmax - pos * THREAD_SIZE; kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_size + kernel_size); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); - boot_printk("The kernel base address is forced to %lx\n", kernel_start); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); } else { kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); } __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; if (IS_ENABLED(CONFIG_KMSAN)) VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; @@ -329,10 +346,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) VMALLOC_END -= vmalloc_size * 2; } VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); /* split remaining virtual space between 1:1 mapping & vmemmap array */ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); @@ -352,8 +374,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); return asce_limit; } @@ -412,6 +437,10 @@ void startup_kernel(void) psw_t psw; setup_lpp(); + store_ipl_parmblock(); + uv_query_info(); + setup_boot_command_line(); + parse_boot_command_line(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE @@ -431,12 +460,8 @@ void startup_kernel(void) oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - store_ipl_parmblock(); read_ipl_report(); - uv_query_info(); sclp_early_read_info(); - setup_boot_command_line(); - parse_boot_command_line(); detect_facilities(); cmma_init(); sanitize_prot_virt_host(); @@ -526,6 +551,7 @@ void startup_kernel(void) __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); copy_bootdata(); __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, (struct alt_instr *)_vmlinux_info.alt_instructions_end, @@ -542,5 +568,6 @@ void startup_kernel(void) */ psw.addr = __kaslr_offset + vmlinux.entry; psw.mask = PSW_KERNEL_BITS; + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); __load_psw(psw); } diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 881a1ece422f..cfca94a8eac4 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -13,6 +14,7 @@ #include "decompressor.h" #include "boot.h" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -31,12 +33,42 @@ enum populate_mode { POPULATE_IDENTITY, POPULATE_KERNEL, #ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ POPULATE_KASAN_MAP_SHADOW, POPULATE_KASAN_ZERO_SHADOW, POPULATE_KASAN_SHALLOW #endif }; +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); #ifdef CONFIG_KASAN @@ -52,9 +84,12 @@ static pte_t pte_z; static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) { - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - pgtable_populate(start, end, mode); + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); } static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) @@ -200,7 +235,7 @@ static void *boot_crst_alloc(unsigned long val) unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); crst_table_init(table, val); __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; @@ -216,7 +251,7 @@ static pte_t *boot_pte_alloc(void) * during POPULATE_KASAN_MAP_SHADOW when EDAT is off */ if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; __arch_set_page_dat(pte, 1); } else { @@ -228,11 +263,12 @@ static pte_t *boot_pte_alloc(void) return pte; } -static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) { switch (mode) { case POPULATE_NONE: - return -1; + return INVALID_PHYS_ADDR; case POPULATE_DIRECT: return addr; case POPULATE_LOWCORE: @@ -245,38 +281,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return __identity_pa(addr); #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_top_down(RR_VMEM, size, size); - memset((void *)addr, 0, size); - return addr; + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; #endif default: - return -1; + return INVALID_PHYS_ADDR; } } -static bool large_allowed(enum populate_mode mode) +static bool large_page_mapping_allowed(enum populate_mode mode) { - return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL); + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: +#endif + return true; + default: + return false; + } } -static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; + + if (!machine.has_edat2 || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; - return machine.has_edat2 && large_allowed(mode) && - IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE); + return pa; } -static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; - return machine.has_edat1 && large_allowed(mode) && - IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE); + if (!machine.has_edat1 || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, @@ -290,7 +352,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (pte_none(*pte)) { if (kasan_pte_populate_zero_shadow(pte, mode)) continue; - entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); set_pte(pte, entry); pages++; @@ -303,7 +365,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -313,8 +375,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (pmd_none(*pmd)) { if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) continue; - if (can_large_pmd(pmd, addr, next, mode)) { - entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); entry = set_pmd_bit(entry, SEGMENT_KERNEL); set_pmd(pmd, entry); pages++; @@ -334,7 +397,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -344,8 +407,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (pud_none(*pud)) { if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) continue; - if (can_large_pud(pud, addr, next, mode)) { - entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); entry = set_pud_bit(entry, REGION3_KERNEL); set_pud(pud, entry); pages++; @@ -388,6 +452,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat pgd_t *pgd; p4d_t *p4d; + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + pgd = pgd_offset(&init_mm, addr); for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 4a6b0a8b6412..2e829c16fd8a 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -9,11 +9,11 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 -#define EX_TYPE_UA_STORE 3 -#define EX_TYPE_UA_LOAD_MEM 4 +#define EX_TYPE_UA_FAULT 3 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 +#define EX_TYPE_FPC 8 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -69,11 +69,8 @@ #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) -#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) - -#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) +#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0) #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) @@ -84,4 +81,7 @@ #define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0) +#define EX_TABLE_FPC(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h index ec011b94af2a..e9062b01e2a2 100644 --- a/arch/s390/include/asm/asm.h +++ b/arch/s390/include/asm/asm.h @@ -28,7 +28,7 @@ * [var] also contains the program mask. CC_TRANSFORM() moves the condition * code to the two least significant bits and sets all other bits to zero. */ -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN)) +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN)) #define __HAVE_ASM_FLAG_OUTPUTS__ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 15aa64e3020e..d5125296ade2 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -60,7 +60,7 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig asm volatile( " tm %[addr],%[mask]\n" : "=@cc" (cc) - : [addr] "R" (*addr), [mask] "I" (mask) + : [addr] "Q" (*addr), [mask] "I" (mask) ); return cc == 3; } diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index f7eed27b3220..f55f8227058e 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_BOOT_DATA_H +#include <linux/string.h> #include <asm/setup.h> #include <asm/ipl.h> @@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size; extern unsigned long early_ipl_comp_list_addr; extern unsigned long early_ipl_comp_list_size; +extern char boot_rb[PAGE_SIZE * 2]; +extern bool boot_earlyprintk; +extern size_t boot_rb_off; +extern char bootdebug_filter[128]; +extern bool bootdebug; + +#define boot_rb_foreach(cb) \ + do { \ + size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \ + size_t len; \ + for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + } while (0) + +/* + * bootdebug_filter is a comma separated list of strings, + * where each string can be a prefix of the message. + */ +static inline bool bootdebug_filter_match(const char *buf) +{ + char *p = bootdebug_filter, *s; + char *end; + + if (!*p) + return true; + + end = p + strlen(p); + while (p < end) { + p = skip_spaces(p); + s = memscan(p, ',', end - p); + if (!strncmp(p, buf, s - p)) + return true; + p = s + 1; + } + return false; +} + +static inline const char *skip_timestamp(const char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + const char *p = memchr(buf, ']', strlen(buf)); + + if (p && p[1] == ' ') + return p + 2; +#endif + return buf; +} + #endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index de510c9f6efa..f668bffd6dd3 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc) */ static inline void fpu_lfpc_safe(unsigned int *fpc) { - u32 tmp; - instrument_read(fpc, sizeof(*fpc)); asm_inline volatile( - "0: lfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[tmp],0\n" - " sfpc %[tmp]\n" - " jg 1b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [tmp] "=d" (tmp) + " lfpc %[fpc]\n" + "0: nopr %%r7\n" + EX_TABLE_FPC(0b, 0b) + : : [fpc] "Q" (*fpc) : "memory"); } @@ -183,7 +176,19 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vl(u8 v1, const void *vxr) +{ + instrument_read(vxr, sizeof(__vector128)); + asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + : + : [vxr] "Q" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory"); +} + +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vl(u8 v1, const void *vxr) { @@ -197,19 +202,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ - -static __always_inline void fpu_vl(u8 v1, const void *vxr) -{ - instrument_read(vxr, sizeof(__vector128)); - asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" - : - : [vxr] "Q" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory"); -} - -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vleib(u8 v, s16 val, u8 index) { @@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index) return val; } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile( - " la 1,%[vxr]\n" - " VLL %[v1],%[index],0,1\n" - : - : [vxr] "R" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + : + : [vxr] "Q" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" - : - : [vxr] "Q" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VLL %[v1],%[index],0,1\n" + : + : [vxr] "R" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile( \ - " la 1,%[vxrs]\n" \ - " VLM %[v1],%[v3],0,1\n" \ - : \ - : [vxrs] "R" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : \ - : [vxrs] "Q" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vlr(u8 v1, u8 v2) { @@ -362,7 +355,18 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); +} + +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vst(u8 v1, const void *vxr) { @@ -375,20 +379,23 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -static __always_inline void fpu_vst(u8 v1, const void *vxr) +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { - instrument_write(vxr, sizeof(__vector128)); - asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" - : [vxr] "=Q" (*(__vector128 *)vxr) - : [v1] "I" (v1) + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) : "memory"); } -#endif /* CONFIG_CC_IS_CLANG */ - -#ifdef CONFIG_CC_IS_CLANG +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -404,23 +411,9 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) -{ - unsigned int size; - - size = min(index + 1, sizeof(__vector128)); - instrument_write(vxr, size); - asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" - : [vxr] "=Q" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory"); -} - -#endif /* CONFIG_CC_IS_CLANG */ - -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile( \ - " la 1,%[vxrs]\n" \ - " VSTM %[v1],%[v3],0,1\n" \ - : [vxrs] "=R" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : [vxrs] "=Q" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : [vxrs] "=Q" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VSTM %[v1],%[v3],0,1\n" \ + : [vxrs] "=R" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vupllf(u8 v1, u8 v2) { diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index a3b73a4f626e..185331e91f83 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -51,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } +#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip)) #include <linux/ftrace_regs.h> diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 752a2310f0d6..f5781794356b 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -2,80 +2,95 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H +#include <linux/instrumented.h> #include <linux/uaccess.h> #include <linux/futex.h> #include <asm/asm-extable.h> #include <asm/mmu_context.h> #include <asm/errno.h> -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile( \ - " sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1:"insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 768\n" \ - EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ - EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc"); +#define FUTEX_OP_FUNC(name, insn) \ +static uaccess_kmsan_or_inline int \ +__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ +{ \ + int rc, new; \ + \ + instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \ + asm_inline volatile( \ + " sacf 256\n" \ + "0: l %[old],%[uaddr]\n" \ + "1:"insn \ + "2: cs %[old],%[new],%[uaddr]\n" \ + "3: jl 1b\n" \ + " lhi %[rc],0\n" \ + "4: sacf 768\n" \ + EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \ + : [rc] "=d" (rc), [old] "=&d" (*old), \ + [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ + : [oparg] "d" (oparg) \ + : "cc"); \ + if (!rc) \ + instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \ + return rc; \ +} + +FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n") +FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n") +FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n") +FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n") +FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n") -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) +static inline +int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - int oldval = 0, newval, ret; + int old, rc; switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_set(oparg, &old, uaddr); break; case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_add(oparg, &old, uaddr); break; case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_or(oparg, &old, uaddr); break; case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, ~oparg); + rc = __futex_atomic_and(~oparg, &old, uaddr); break; case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_xor(oparg, &old, uaddr); break; default: - ret = -ENOSYS; + rc = -ENOSYS; } - - if (!ret) - *oval = oldval; - - return ret; + if (!rc) + *oval = old; + return rc; } -static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) +static uaccess_kmsan_or_inline +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - int ret; + int rc; - asm volatile( - " sacf 256\n" - "0: cs %1,%4,0(%5)\n" - "1: la %0,0\n" - "2: sacf 768\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + instrument_copy_from_user_before(uval, uaddr, sizeof(*uval)); + asm_inline volatile( + " sacf 256\n" + "0: cs %[old],%[new],%[uaddr]\n" + "1: lhi %[rc],0\n" + "2: sacf 768\n" + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) + : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr) + : [new] "d" (newval) : "cc", "memory"); *uval = oldval; - return ret; + instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0); + return rc; } #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 4f43cdd9835b..1ff145f7b52b 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -184,7 +184,11 @@ extern struct vm_layout vm_layout; #define __kaslr_offset vm_layout.kaslr_offset #define __kaslr_offset_phys vm_layout.kaslr_offset_phys +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE #define __identity_base vm_layout.identity_base +#else +#define __identity_base 0UL +#endif #define ident_map_size vm_layout.identity_size static inline unsigned long kaslr_offset(void) diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 51b68a43e195..7ef3bbec98b0 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,7 +26,7 @@ enum reserved_range_type { RR_AMODE31, RR_IPLREPORT, RR_CERT_COMP_LIST, - RR_MEM_DETECT_EXTENDED, + RR_MEM_DETECT_EXT, RR_VMEM, RR_MAX }; @@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(AMODE31); RR_TYPE_NAME(IPLREPORT); RR_TYPE_NAME(CERT_COMP_LIST); - RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(MEM_DETECT_EXT); RR_TYPE_NAME(VMEM); default: return "UNKNOWN"; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 4da3b2956285..18f37dff03c9 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -172,6 +172,7 @@ void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); void sclp_emergency_printk(const char *s); +int sclp_init(void); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); int _sclp_get_core_info(struct sclp_core_info *info); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a81f897a81ce..f5920163ee97 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -22,16 +22,117 @@ void debug_user_asce(int exit); -unsigned long __must_check -raw_copy_from_user(void *to, const void __user *from, unsigned long n); +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; -unsigned long __must_check -raw_copy_to_user(void __user *to, const void *from, unsigned long n); +static __always_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) +{ + unsigned long rem; + union oac spec = { + .oac2.key = key, + .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, + .oac2.a = 1, + }; -#ifndef CONFIG_KASAN -#define INLINE_COPY_FROM_USER -#define INLINE_COPY_TO_USER -#endif + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "1: jz 5f\n" + " algr %[size],%[val]\n" + " slgr %[from],%[val]\n" + " slgr %[to],%[val]\n" + " j 0b\n" + "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ + " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ + " slgr %[rem],%[from]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" + " j 6f\n" + "5: lghi %[size],0\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) + : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +} + +static __always_inline __must_check unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return raw_copy_from_user_key(to, from, n, 0); +} + +static __always_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) +{ + unsigned long rem; + union oac spec = { + .oac1.key = key, + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, + .oac1.a = 1, + }; + + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "1: jz 5f\n" + " algr %[size],%[val]\n" + " slgr %[to],%[val]\n" + " slgr %[from],%[val]\n" + " j 0b\n" + "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ + " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ + " slgr %[rem],%[to]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" + " j 6f\n" + "5: lghi %[size],0\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) + : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +} + +static __always_inline __must_check unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return raw_copy_to_user_key(to, from, n, 0); +} unsigned long __must_check _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); @@ -55,63 +156,71 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo return n; } -union oac { - unsigned int val; - struct { - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac1; - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac2; - }; -}; - int __noreturn __put_user_bad(void); #ifdef CONFIG_KMSAN -#define get_put_user_noinstr_attributes \ - noinline __maybe_unused __no_sanitize_memory +#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory #else -#define get_put_user_noinstr_attributes __always_inline +#define uaccess_kmsan_or_inline __always_inline #endif -#define DEFINE_PUT_USER(type) \ -static get_put_user_noinstr_attributes int \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__put_user_##type##_noinstr(unsigned type __user *to, \ + unsigned type *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac1.as = PSW_BITS_AS_SECONDARY, \ - .oac1.a = 1, \ - }; \ int rc; \ \ asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos %[_to],%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_PUT_USER_NOINSTR(char); +DEFINE_PUT_USER_NOINSTR(short); +DEFINE_PUT_USER_NOINSTR(int); +DEFINE_PUT_USER_NOINSTR(long); + +#define DEFINE_PUT_USER(type) \ static __always_inline int \ __put_user_##type(unsigned type __user *to, unsigned type *from, \ unsigned long size) \ @@ -128,69 +237,111 @@ DEFINE_PUT_USER(short); DEFINE_PUT_USER(int); DEFINE_PUT_USER(long); -static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - int rc; +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __prc; \ + \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __prc = __put_user_char((unsigned char __user *)(ptr), \ + (unsigned char *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 2: \ + __prc = __put_user_short((unsigned short __user *)(ptr),\ + (unsigned short *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 4: \ + __prc = __put_user_int((unsigned int __user *)(ptr), \ + (unsigned int *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 8: \ + __prc = __put_user_long((unsigned long __user *)(ptr), \ + (unsigned long *)&__x, \ + sizeof(*(ptr))); \ + break; \ + default: \ + __prc = __put_user_bad(); \ + break; \ + } \ + __builtin_expect(__prc, 0); \ +}) - switch (size) { - case 1: - rc = __put_user_char((unsigned char __user *)ptr, - (unsigned char *)x, - size); - break; - case 2: - rc = __put_user_short((unsigned short __user *)ptr, - (unsigned short *)x, - size); - break; - case 4: - rc = __put_user_int((unsigned int __user *)ptr, - (unsigned int *)x, - size); - break; - case 8: - rc = __put_user_long((unsigned long __user *)ptr, - (unsigned long *)x, - size); - break; - default: - __put_user_bad(); - break; - } - return rc; -} +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) int __noreturn __get_user_bad(void); -#define DEFINE_GET_USER(type) \ -static get_put_user_noinstr_attributes int \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __get_user_##type##_noinstr(unsigned type *to, \ - unsigned type __user *from, \ + const unsigned type __user *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + *to = 0; \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__get_user_##type##_noinstr(unsigned type *to, \ + const unsigned type __user *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac2.as = PSW_BITS_AS_SECONDARY, \ - .oac2.a = 1, \ - }; \ int rc; \ \ asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ - EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ - : [rc] "=&d" (rc), "=Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val), [_to] "a" (to), \ - [_ksize] "K" (size) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ + if (likely(!rc)) \ + return 0; \ + *to = 0; \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_GET_USER_NOINSTR(char); +DEFINE_GET_USER_NOINSTR(short); +DEFINE_GET_USER_NOINSTR(int); +DEFINE_GET_USER_NOINSTR(long); + +#define DEFINE_GET_USER(type) \ static __always_inline int \ -__get_user_##type(unsigned type *to, unsigned type __user *from, \ +__get_user_##type(unsigned type *to, const unsigned type __user *from, \ unsigned long size) \ { \ int rc; \ @@ -205,107 +356,50 @@ DEFINE_GET_USER(short); DEFINE_GET_USER(int); DEFINE_GET_USER(long); -static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - int rc; - - switch (size) { - case 1: - rc = __get_user_char((unsigned char *)x, - (unsigned char __user *)ptr, - size); - break; - case 2: - rc = __get_user_short((unsigned short *)x, - (unsigned short __user *)ptr, - size); - break; - case 4: - rc = __get_user_int((unsigned int *)x, - (unsigned int __user *)ptr, - size); - break; - case 8: - rc = __get_user_long((unsigned long *)x, - (unsigned long __user *)ptr, - size); - break; - default: - __get_user_bad(); - break; - } - return rc; -} - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - */ -#define __put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __x = (x); \ - int __pu_err = -EFAULT; \ - \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - case 8: \ - __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ - break; \ - default: \ - __put_user_bad(); \ - break; \ - } \ - __builtin_expect(__pu_err, 0); \ -}) - -#define put_user(x, ptr) \ -({ \ - might_fault(); \ - __put_user(x, ptr); \ -}) - #define __get_user(x, ptr) \ ({ \ - int __gu_err = -EFAULT; \ + const __user void *____guptr = (ptr); \ + int __grc; \ \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ + const unsigned char __user *__guptr = ____guptr; \ unsigned char __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 2: { \ + const unsigned short __user *__guptr = ____guptr; \ unsigned short __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 4: { \ + const unsigned int __user *__guptr = ____guptr; \ unsigned int __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 8: { \ + const unsigned long __user *__guptr = ____guptr; \ unsigned long __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ default: \ - __get_user_bad(); \ + __grc = __get_user_bad(); \ break; \ } \ - __builtin_expect(__gu_err, 0); \ + __builtin_expect(__grc, 0); \ }) #define get_user(x, ptr) \ @@ -341,109 +435,71 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size) return __s390_kernel_write(dst, src, size); } -int __noreturn __put_kernel_bad(void); +void __noreturn __mvc_kernel_nofault_bad(void); -#define __put_kernel_asm(val, to, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_to]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \ - : [_val] "d" (val) \ - : "cc"); \ - __rc; \ -}) +#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - unsigned long __x = (unsigned long)(*((type *)(src))); \ - int __pk_err; \ - \ switch (sizeof(type)) { \ case 1: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ - break; \ case 2: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ - break; \ case 4: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ - break; \ case 8: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + asm goto( \ + "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[err_label]) \ + EX_TABLE(1b, %l[err_label]) \ + : [_dst] "=Q" (*(type *)dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_len] "I" (sizeof(type)) \ + : \ + : err_label); \ break; \ default: \ - __pk_err = __put_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__pk_err)) \ - goto err_label; \ } while (0) -int __noreturn __get_kernel_bad(void); - -#define __get_kernel_asm(val, from, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_from]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \ - EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \ - : [rc] "=d" (__rc), [_val] "=d" (val) \ - : [_from] "Q" (*(from)) \ - : "cc"); \ - __rc; \ -}) +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - int __gk_err; \ + type *(__dst) = (type *)(dst); \ + int __rc; \ \ switch (sizeof(type)) { \ - case 1: { \ - unsigned char __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 2: { \ - unsigned short __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 4: { \ - unsigned int __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 8: { \ - unsigned long __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ - *((type *)(dst)) = (type)__x; \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + asm_inline volatile( \ + "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \ + "1: lhi %[_rc],0\n" \ + "2:\n" \ + EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ + : [_rc] "=d" (__rc), \ + "=m" (*__dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_dst] "a" (__dst), \ + [_len] "I" (sizeof(type))); \ + if (__rc) \ + goto err_label; \ break; \ - }; \ default: \ - __gk_err = __get_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__gk_err)) \ - goto err_label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ + +#define __get_kernel_nofault __mvc_kernel_nofault +#define __put_kernel_nofault __mvc_kernel_nofault + void __cmpxchg_user_key_called_with_bad_pointer(void); #define CMPXCHG_USER_KEY_MAX_LOOPS 128 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a3..2fa25164df7d 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -50,6 +50,7 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -58,7 +59,7 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); #endif } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index c0b2c97efefb..63ba6306632e 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -266,18 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; - int bit; if (unlikely(ftrace_graph_is_dead())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - bit = ftrace_test_recursion_trylock(ip, *parent); - if (bit < 0) - return; if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) *parent = (unsigned long)&return_to_handler; - ftrace_test_recursion_unlock(bit); } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1298f0860733..d78bcfe707b5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -157,6 +157,12 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); @@ -686,7 +692,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -694,7 +700,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } @@ -724,7 +730,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); @@ -866,6 +872,23 @@ static void __init log_component_list(void) } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -884,6 +907,9 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); if (have_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); @@ -987,3 +1013,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c92..ff1ddba96352 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c7c269d5c491..f977b7c37efc 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -31,51 +31,6 @@ void debug_user_asce(int exit) } #endif /*CONFIG_DEBUG_ENTRY */ -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac2.key = key, - .oac2.as = PSW_BITS_AS_SECONDARY, - .oac2.k = 1, - .oac2.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[from],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ - " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ - " slgr %[rem],%[from]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return raw_copy_from_user_key(to, from, n, 0); -} -EXPORT_SYMBOL(raw_copy_from_user); - unsigned long _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) { @@ -93,51 +48,6 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac1.key = key, - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.k = 1, - .oac1.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " slgr %[from],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return raw_copy_to_user_key(to, from, n, 0); -} -EXPORT_SYMBOL(raw_copy_to_user); - unsigned long _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) { diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 0a0738a473af..a046be1715cf 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -7,6 +7,7 @@ #include <linux/panic.h> #include <asm/asm-extable.h> #include <asm/extable.h> +#include <asm/fpu.h> const struct exception_table_entry *s390_search_extables(unsigned long addr) { @@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } -static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs) { unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); @@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p return true; } -static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs) -{ - unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); - unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); - size_t len = FIELD_GET(EX_DATA_LEN, ex->data); - - regs->gprs[reg_err] = -EFAULT; - memset((void *)regs->gprs[reg_addr], 0, len); - regs->psw.addr = extable_fixup(ex); - return true; -} - static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, bool pair, struct pt_regs *regs) { @@ -77,6 +66,13 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt return true; } +static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + fpu_sfpc(0); + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -89,16 +85,16 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); - case EX_TYPE_UA_STORE: - return ex_handler_ua_store(ex, regs); - case EX_TYPE_UA_LOAD_MEM: - return ex_handler_ua_load_mem(ex, regs); + case EX_TYPE_UA_FAULT: + return ex_handler_ua_fault(ex, regs); case EX_TYPE_UA_LOAD_REG: return ex_handler_ua_load_reg(ex, false, regs); case EX_TYPE_UA_LOAD_REGPAIR: return ex_handler_ua_load_reg(ex, true, regs); case EX_TYPE_ZEROPAD: return ex_handler_zeropad(ex, regs); + case EX_TYPE_FPC: + return ex_handler_fpc(ex, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 7c684c54e721..8ead999e340b 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -662,7 +662,7 @@ void __init vmem_map_init(void) if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); if (debug_pagealloc_enabled()) - __set_memory_4k(__va(0), __va(0) + ident_map_size); + __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index d5ace00d10f0..857afbc4828f 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void) static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && - zpci_is_device_configured(zdev) && !zdev->vfn; } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 24eccaa29337..bdcf2a3b6c41 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -13,7 +13,7 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index a1bc02b29c81..7d76c417f83f 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b) return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name); } +static void print_insn_name(const char *name) +{ + size_t i, len; + + len = strlen(name); + printf("{"); + for (i = 0; i < len; i++) + printf(" \'%c\',", name[i]); + printf(" }"); +} + static void print_long_insn(struct gen_opcode *desc) { struct insn *insn; @@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc) insn = &desc->insn[i]; if (insn->name_len < 6) continue; - printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name); + printf("\t[LONG_INSN_%s] = ", insn->upper); + print_insn_name(insn->name); + printf(", \\\n"); } printf("}\n\n"); } @@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr) if (insn->type->byte != 0) opcode += 2; printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format); - if (insn->name_len < 6) - printf(".name = \"%s\" ", insn->name); - else - printf(".offset = LONG_INSN_%s ", insn->upper); - printf("}, \\\n"); + if (insn->name_len < 6) { + printf(".name = "); + print_insn_name(insn->name); + } else { + printf(".offset = LONG_INSN_%s", insn->upper); + } + printf(" }, \\\n"); } static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) diff --git a/arch/um/drivers/rtc_kern.c b/arch/um/drivers/rtc_kern.c index 134a58f93c85..9158c936c128 100644 --- a/arch/um/drivers/rtc_kern.c +++ b/arch/um/drivers/rtc_kern.c @@ -51,6 +51,7 @@ static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) { + struct timespec64 ts; unsigned long long secs; if (!enable && !uml_rtc_alarm_enabled) @@ -58,7 +59,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) uml_rtc_alarm_enabled = enable; - secs = uml_rtc_alarm_time - ktime_get_real_seconds(); + read_persistent_clock64(&ts); + secs = uml_rtc_alarm_time - ts.tv_sec; if (time_travel_mode == TT_MODE_OFF) { if (!enable) { @@ -73,7 +75,8 @@ static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) if (enable) time_travel_add_event_rel(¨_rtc_alarm_event, - secs * NSEC_PER_SEC); + secs * NSEC_PER_SEC - + ts.tv_nsec); } return 0; diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h deleted file mode 100644 index 2efac5827188..000000000000 --- a/arch/um/include/asm/fixmap.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_FIXMAP_H -#define __UM_FIXMAP_H - -#include <asm/processor.h> -#include <asm/archparam.h> -#include <asm/page.h> -#include <linux/threads.h> - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ - -/* - * on UP currently we will have no trace of the fixmap mechanizm, - * no page table allocations, etc. This might change in the - * future, say framebuffers for the console driver(s) could be - * fix-mapped? - */ -enum fixed_addresses { - __end_of_fixed_addresses -}; - -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -/* - * used by vmalloc.c. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap, and leave one page empty - * at the top of mem.. - */ - -#define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - -#include <asm-generic/fixmap.h> - -#endif diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 0bd60afcc37d..5601ca98e8a6 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -8,7 +8,8 @@ #ifndef __UM_PGTABLE_H #define __UM_PGTABLE_H -#include <asm/fixmap.h> +#include <asm/page.h> +#include <linux/mm_types.h> #define _PAGE_PRESENT 0x001 #define _PAGE_NEEDSYNC 0x002 @@ -48,11 +49,9 @@ extern unsigned long end_iomem; #define VMALLOC_OFFSET (__va_space) #define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) -#define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#define VMALLOC_END (TASK_SIZE-2*PAGE_SIZE) #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index d98812907493..befed230aac2 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -9,7 +9,6 @@ #include <linux/mm.h> #include <linux/swap.h> #include <linux/slab.h> -#include <asm/fixmap.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <as-layout.h> @@ -74,6 +73,7 @@ void __init mem_init(void) kmalloc_ok = 1; } +#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) /* * Create a page table and place a pointer to it in a middle page * directory entry. @@ -152,7 +152,6 @@ static void __init fixrange_init(unsigned long start, unsigned long end, static void __init fixaddr_user_init( void) { -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA long size = FIXADDR_USER_END - FIXADDR_USER_START; pte_t *pte; phys_t p; @@ -174,13 +173,12 @@ static void __init fixaddr_user_init( void) pte = virt_to_kpte(vaddr); pte_set_val(*pte, p, PAGE_READONLY); } -#endif } +#endif void __init paging_init(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; - unsigned long vaddr; empty_zero_page = (unsigned long *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); @@ -191,14 +189,9 @@ void __init paging_init(void) max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; free_area_init(max_zone_pfn); - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); - +#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) fixaddr_user_init(); +#endif } /* diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 30bdc0a87dc8..e5a2d4d897e0 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -213,14 +213,6 @@ int __uml_cant_sleep(void) { /* Is in_interrupt() really needed? */ } -int user_context(unsigned long sp) -{ - unsigned long stack; - - stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); - return stack != (unsigned long) current_thread_info(); -} - extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; void do_uml_exitcalls(void) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8037a967225d..79ea97d4797e 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -264,7 +264,7 @@ EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) -static void parse_host_cpu_flags(char *line) +static void __init parse_host_cpu_flags(char *line) { int i; for (i = 0; i < 32*NCAPINTS; i++) { @@ -272,7 +272,8 @@ static void parse_host_cpu_flags(char *line) set_cpu_cap(&boot_cpu_data, i); } } -static void parse_cache_line(char *line) + +static void __init parse_cache_line(char *line) { long res; char *to_parse = strstr(line, ":"); @@ -288,7 +289,7 @@ static void parse_cache_line(char *line) } } -static unsigned long get_top_address(char **envp) +static unsigned long __init get_top_address(char **envp) { unsigned long top_addr = (unsigned long) &top_addr; int i; @@ -376,9 +377,8 @@ int __init linux_main(int argc, char **argv, char **envp) iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; - - if (physmem_size + iomem_size > max_physmem) { - physmem_size = max_physmem - iomem_size; + if (physmem_size > max_physmem) { + physmem_size = max_physmem; os_info("Physical memory size shrunk to %llu bytes\n", physmem_size); } diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 0afcdeb8995b..3c63ce19e3bf 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -19,13 +19,11 @@ #include <um_malloc.h> #include "internal.h" -#define PGD_BOUND (4 * 1024 * 1024) #define STACKSIZE (8 * 1024 * 1024) -#define THREAD_NAME_LEN (256) long elf_aux_hwcap; -static void set_stklim(void) +static void __init set_stklim(void) { struct rlimit lim; @@ -48,7 +46,7 @@ static void last_ditch_exit(int sig) exit(1); } -static void install_fatal_handler(int sig) +static void __init install_fatal_handler(int sig) { struct sigaction action; @@ -73,7 +71,7 @@ static void install_fatal_handler(int sig) #define UML_LIB_PATH ":" OS_LIB_PATH "/uml" -static void setup_env_path(void) +static void __init setup_env_path(void) { char *new_path = NULL; char *old_path = NULL; diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink index fef2e977cc7d..8b8a68162c94 100644 --- a/arch/x86/Makefile.postlink +++ b/arch/x86/Makefile.postlink @@ -11,6 +11,7 @@ __archpost: -include include/config/auto.conf include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib CMD_RELOCS = arch/x86/tools/relocs OUT_RELOCS = arch/x86/boot/compressed @@ -20,11 +21,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \ $(CMD_RELOCS) --abs-relocs $@ -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - # `@true` prevents complaint when there is nothing to be done vmlinux: FORCE diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index ab9f3dd87c80..ab0c78855ecb 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -84,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask); extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec); extern int hpet_set_periodic_freq(unsigned long freq); -extern int hpet_rtc_dropped_irq(void); extern int hpet_rtc_timer_init(void); extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id); extern int hpet_register_irq_handler(rtc_irq_handler handler); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index ce4677b8b735..3b496cdcb74b 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -37,6 +37,8 @@ typedef struct { */ atomic64_t tlb_gen; + unsigned long next_trim_cpumask; + #ifdef CONFIG_MODIFY_LDT_SYSCALL struct rw_semaphore ldt_usr_sem; struct ldt_struct *ldt; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 2886cb668d7f..795fdd53bd0a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -151,6 +151,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); + mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 69e79fff41b8..02fc2aa06e9e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -222,6 +222,7 @@ struct flush_tlb_info { unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; + u8 trim_cpumask; }; void flush_tlb_local(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7c15d6e83c37..dae6a73be40e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -227,6 +227,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) } static int __init +acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + /* Ignore invalid ID */ + if (processor->id == 0xff) + return 0; + + /* Ignore processors that can not be onlined */ + if (!acpi_is_processor_usable(processor->lapic_flags)) + return 0; + + has_lapic_cpus = true; + return 0; +} + +static int __init acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); - has_lapic_cpus = true; return 0; } @@ -1026,6 +1047,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { int count, x2count = 0; + struct acpi_subtable_proc madt_proc[2]; + int ret; if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1034,10 +1057,27 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_LOCAL_APIC); - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_LOCAL_APIC); + /* Check if there are valid LAPIC entries */ + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC); + + /* + * Enumerate the APIC IDs in the order that they appear in the + * MADT, no matter LAPIC entry or x2APIC entry is used. + */ + memset(madt_proc, 0, sizeof(madt_proc)); + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; + madt_proc[0].handler = acpi_parse_lapic; + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; + madt_proc[1].handler = acpi_parse_x2apic; + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, + sizeof(struct acpi_table_madt), + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); + if (ret < 0) { + pr_err("Error parsing LAPIC/X2APIC entries\n"); + return ret; + } + count = madt_proc[0].count; + x2count = madt_proc[1].count; } if (!count && !x2count) { pr_err("No LAPIC entries present\n"); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 243843e44e89..c71b575bf229 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1854,11 +1854,18 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) return temp_state; } +__ro_after_init struct mm_struct *poking_mm; +__ro_after_init unsigned long poking_addr; + static inline void unuse_temporary_mm(temp_mm_state_t prev_state) { lockdep_assert_irqs_disabled(); + switch_mm_irqs_off(NULL, prev_state.mm, current); + /* Clear the cpumask, to indicate no TLB flushing is needed anywhere */ + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm)); + /* * Restore the breakpoints if they were disabled before the temporary mm * was loaded. @@ -1867,9 +1874,6 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) hw_breakpoint_restore(); } -__ro_after_init struct mm_struct *poking_mm; -__ro_after_init unsigned long poking_addr; - static void text_poke_memcpy(void *dst, const void *src, size_t len) { memcpy(dst, src, len); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9182303a50b0..7f4b2966e15c 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1382,12 +1382,6 @@ int hpet_set_periodic_freq(unsigned long freq) } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); -int hpet_rtc_dropped_irq(void) -{ - return is_hpet_enabled(); -} -EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); - static void hpet_rtc_timer_reinit(void) { unsigned int delta; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ac52255fab01..296d294142c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -7,7 +7,6 @@ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/extable.h> /* search_exception_tables */ #include <linux/memblock.h> /* max_low_pfn */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a2becb85bea7..6cf881a942bb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -607,18 +607,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Stop remote flushes for the previous mm. - * Skip kernel threads; we never send init_mm TLB flushing IPIs, - * but the bitmap manipulation can cause cache line contention. + * Leave this CPU in prev's mm_cpumask. Atomic writes to + * mm_cpumask can be expensive under contention. The CPU + * will be removed lazily at TLB flush time. */ - if (prev != &init_mm) { - VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, - mm_cpumask(prev))); - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - } + VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, + mm_cpumask(prev))); /* Start receiving IPIs and then read tlb_gen (and LAM below) */ - if (next != &init_mm) + if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); @@ -760,10 +757,13 @@ static void flush_tlb_func(void *info) if (!local) { inc_irq_stat(irq_tlb_count); count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } - /* Can only happen on remote CPUs */ - if (f->mm && f->mm != loaded_mm) - return; + /* The CPU was left in the mm_cpumask of the target mm. Clear it. */ + if (f->mm && f->mm != loaded_mm) { + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); + trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0); + return; } if (unlikely(loaded_mm == &init_mm)) @@ -893,9 +893,36 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu, void *data) +static bool should_flush_tlb(int cpu, void *data) +{ + struct flush_tlb_info *info = data; + + /* Lazy TLB will get flushed at the next context switch. */ + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + return false; + + /* No mm means kernel memory flush. */ + if (!info->mm) + return true; + + /* The target mm is loaded, and the CPU is not lazy. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + return true; + + /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ + if (info->trim_cpumask) + return true; + + return false; +} + +static bool should_trim_cpumask(struct mm_struct *mm) { - return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); + if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { + WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); + return true; + } + return false; } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); @@ -929,7 +956,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, (void *)info, 1, cpumask); } @@ -980,6 +1007,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); + info->trim_cpumask = 0; return info; } @@ -1022,6 +1050,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * flush_tlb_func_local() directly in this case. */ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); diff --git a/arch/x86/um/asm/archparam.h b/arch/x86/um/asm/archparam.h deleted file mode 100644 index c17cf68dda0f..000000000000 --- a/arch/x86/um/asm/archparam.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Copyright 2003 PathScale, Inc. - * Licensed under the GPL - */ - -#ifndef __UM_ARCHPARAM_H -#define __UM_ARCHPARAM_H - -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -#endif - -#endif diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 2dd4ca6713f8..8f7476ff6e95 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -74,8 +74,6 @@ struct uml_pt_regs { #define UPT_FAULTINFO(r) (&(r)->faultinfo) #define UPT_IS_USER(r) ((r)->is_user) -extern int user_context(unsigned long sp); - extern int arch_init_registers(int pid); #endif /* __SYSDEP_X86_PTRACE_H */ diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f1cf7f2909f3..9ed93d91d754 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1546,6 +1546,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) struct request_queue *q = disk->queue; struct blkg_policy_data *pd_prealloc = NULL; struct blkcg_gq *blkg, *pinned_blkg = NULL; + unsigned int memflags; int ret; if (blkcg_policy_enabled(q, pol)) @@ -1560,7 +1561,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) return -EINVAL; if (queue_is_mq(q)) - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); retry: spin_lock_irq(&q->queue_lock); @@ -1624,7 +1625,7 @@ retry: spin_unlock_irq(&q->queue_lock); out: if (queue_is_mq(q)) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (pinned_blkg) blkg_put(pinned_blkg); if (pd_prealloc) @@ -1668,12 +1669,13 @@ void blkcg_deactivate_policy(struct gendisk *disk, { struct request_queue *q = disk->queue; struct blkcg_gq *blkg; + unsigned int memflags; if (!blkcg_policy_enabled(q, pol)) return; if (queue_is_mq(q)) - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); mutex_lock(&q->blkcg_mutex); spin_lock_irq(&q->queue_lock); @@ -1697,7 +1699,7 @@ void blkcg_deactivate_policy(struct gendisk *disk, mutex_unlock(&q->blkcg_mutex); if (queue_is_mq(q)) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 32fb28a6372c..d6c4fa3943b5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -430,7 +430,6 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); - mutex_init(&q->sysfs_dir_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); spin_lock_init(&q->queue_lock); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index c9eb4241e048..d479f5481b66 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -111,7 +111,6 @@ int disk_register_independent_access_ranges(struct gendisk *disk) struct request_queue *q = disk->queue; int i, ret; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -155,7 +154,6 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk) struct blk_independent_access_ranges *iars = disk->ia_ranges; int i; - lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); if (!iars) @@ -289,7 +287,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk, { struct request_queue *q = disk->queue; - mutex_lock(&q->sysfs_dir_lock); mutex_lock(&q->sysfs_lock); if (iars && !disk_check_ia_ranges(disk, iars)) { kfree(iars); @@ -313,6 +310,5 @@ void disk_set_independent_access_ranges(struct gendisk *disk, disk_register_independent_access_ranges(disk); unlock: mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); } EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5894ec9696e..65a1d4427ccf 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, u32 qos[NR_QOS_PARAMS]; bool enable, user; char *body, *p; + unsigned int memflags; int ret; blkg_conf_init(&ctx, input); @@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(disk->queue); } - blk_mq_freeze_queue(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); spin_lock_irq(&ioc->lock); @@ -3347,7 +3348,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, wbt_enable_default(disk); blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); blkg_conf_exit(&ctx); return nbytes; @@ -3355,7 +3356,7 @@ einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); ret = -EINVAL; err: @@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, { struct blkg_conf_ctx ctx; struct request_queue *q; + unsigned int memflags; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; @@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(q); } - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&ioc->lock); @@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); blkg_conf_exit(&ctx); return nbytes; @@ -3502,7 +3504,7 @@ einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); ret = -EINVAL; err: diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ebb522788d97..42c1e0b9a68f 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work) */ enabled = atomic_read(&blkiolat->enable_cnt); if (enabled != blkiolat->enabled) { - blk_mq_freeze_queue(blkiolat->rqos.disk->queue); + unsigned int memflags; + + memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue); blkiolat->enabled = enabled; - blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue); + blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags); } } diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index ad8d6a363f24..444798c5374f 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -87,7 +87,6 @@ void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, return; fallback: - WARN_ON_ONCE(qmap->nr_queues > 1); - blk_mq_clear_mq_map(qmap); + blk_mq_map_queues(qmap); } EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 156e9bb07abf..3feeeccf8a99 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -223,30 +223,27 @@ int blk_mq_sysfs_register(struct gendisk *disk) unsigned long i, j; int ret; - lockdep_assert_held(&q->sysfs_dir_lock); - ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq"); if (ret < 0) - goto out; + return ret; kobject_uevent(q->mq_kobj, KOBJ_ADD); + mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) - goto unreg; + goto out_unreg; } + mutex_unlock(&q->tag_set->tag_list_lock); + return 0; - q->mq_sysfs_init_done = true; - -out: - return ret; - -unreg: +out_unreg: queue_for_each_hw_ctx(q, hctx, j) { if (j < i) blk_mq_unregister_hctx(hctx); } + mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); @@ -259,15 +256,13 @@ void blk_mq_sysfs_unregister(struct gendisk *disk) struct blk_mq_hw_ctx *hctx; unsigned long i; - lockdep_assert_held(&q->sysfs_dir_lock); - + mutex_lock(&q->tag_set->tag_list_lock); queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); + mutex_unlock(&q->tag_set->tag_list_lock); kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); - - q->mq_sysfs_init_done = false; } void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) @@ -275,15 +270,11 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + return; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - -unlock: - mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) @@ -292,9 +283,8 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) unsigned long i; int ret = 0; - mutex_lock(&q->sysfs_dir_lock); - if (!q->mq_sysfs_init_done) - goto unlock; + if (!blk_queue_registered(q)) + goto out; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -302,8 +292,6 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) break; } -unlock: - mutex_unlock(&q->sysfs_dir_lock); - +out: return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index da39a1cac702..40490ac88045 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); -void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue_nomemsave(struct request_queue *q) { blk_freeze_queue_start(q); blk_mq_freeze_queue_wait(q); } -EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave); bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) { @@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) return unfreeze; } -void blk_mq_unfreeze_queue(struct request_queue *q) +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q) { if (__blk_mq_unfreeze_queue(q, false)) blk_unfreeze_release_lock(q); } -EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); +EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore); /* * non_owner variant of blk_freeze_queue_start @@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set, bool shared) { struct request_queue *q; + unsigned int memflags; lockdep_assert_held(&set->tag_list_lock); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); queue_set_hctx_shared(q, shared); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } } @@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, struct request_queue *q; LIST_HEAD(head); int prev_nr_hw_queues = set->nr_hw_queues; + unsigned int memflags; int i; lockdep_assert_held(&set->tag_list_lock); @@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; + memflags = memalloc_noio_save(); list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_freeze_queue(q); + blk_mq_freeze_queue_nomemsave(q); + /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5052,7 +5056,8 @@ switch_back: blk_mq_elv_switch_back(&head, q); list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) diff --git a/block/blk-pm.c b/block/blk-pm.c index 42e842074715..8d3e052f91da 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q) if (percpu_ref_is_zero(&q->q_usage_counter)) ret = 0; /* Switch q_usage_counter back to per-cpu mode. */ - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue_nomemrestore(q); if (ret < 0) { spin_lock_irq(&q->queue_lock); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index eb9618cd68ad..d4d4f4dc0e23 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -299,6 +299,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; + unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); @@ -310,14 +311,14 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); @@ -327,7 +328,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, return 0; ebusy: - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return -EBUSY; } @@ -335,17 +336,18 @@ void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; + unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); diff --git a/block/blk-settings.c b/block/blk-settings.c index db12396ff5c7..c44dadc35e1e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -461,11 +461,12 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update); int queue_limits_commit_update_frozen(struct request_queue *q, struct queue_limits *lim) { + unsigned int memflags; int ret; - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); ret = queue_limits_commit_update(q, lim); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return ret; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e09b455874bf..6f548a4376aa 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -681,7 +681,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; - unsigned int noio_flag; + unsigned int memflags; ssize_t res; if (!entry->store_limit && !entry->store) @@ -711,11 +711,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, } mutex_lock(&q->sysfs_lock); - blk_mq_freeze_queue(q); - noio_flag = memalloc_noio_save(); + memflags = blk_mq_freeze_queue(q); res = entry->store(disk, page, length); - memalloc_noio_restore(noio_flag); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_unlock(&q->sysfs_lock); return res; } @@ -764,7 +762,6 @@ int blk_register_queue(struct gendisk *disk) struct request_queue *q = disk->queue; int ret; - mutex_lock(&q->sysfs_dir_lock); kobject_init(&disk->queue_kobj, &blk_queue_ktype); ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) @@ -805,7 +802,6 @@ int blk_register_queue(struct gendisk *disk) if (q->elevator) kobject_uevent(&q->elevator->kobj, KOBJ_ADD); mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); /* * SCSI probing may synchronously create and destroy a lot of @@ -830,7 +826,6 @@ out_debugfs_remove: mutex_unlock(&q->sysfs_lock); out_put_queue_kobj: kobject_put(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); return ret; } @@ -861,7 +856,6 @@ void blk_unregister_queue(struct gendisk *disk) blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); mutex_unlock(&q->sysfs_lock); - mutex_lock(&q->sysfs_dir_lock); /* * Remove the sysfs attributes before unregistering the queue data * structures that can be modified through sysfs. @@ -878,7 +872,6 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE); kobject_del(&disk->queue_kobj); - mutex_unlock(&q->sysfs_dir_lock); blk_debugfs_remove(disk); } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 82dbaefcfa3b..8d149aff9fd0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1202,6 +1202,7 @@ static int blk_throtl_init(struct gendisk *disk) { struct request_queue *q = disk->queue; struct throtl_data *td; + unsigned int memflags; int ret; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); @@ -1215,7 +1216,7 @@ static int blk_throtl_init(struct gendisk *disk) * Freeze queue before activating policy, to synchronize with IO path, * which is protected by 'q_usage_counter'. */ - blk_mq_freeze_queue(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); q->td = td; @@ -1239,7 +1240,7 @@ static int blk_throtl_init(struct gendisk *disk) out: blk_mq_unquiesce_queue(disk->queue); - blk_mq_unfreeze_queue(disk->queue); + blk_mq_unfreeze_queue(disk->queue, memflags); return ret; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 9d08a54c201e..761ea662ddc3 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1717,9 +1717,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk) else pr_warn("%s: failed to revalidate zones\n", disk->disk_name); if (ret) { - blk_mq_freeze_queue(q); + unsigned int memflags = blk_mq_freeze_queue(q); + disk_free_zone_resources(disk); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } return ret; diff --git a/block/elevator.c b/block/elevator.c index b81216c48b6b..cd2ce4921601 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -570,6 +570,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) void elevator_init_mq(struct request_queue *q) { struct elevator_type *e; + unsigned int memflags; int err; WARN_ON_ONCE(blk_queue_registered(q)); @@ -590,13 +591,13 @@ void elevator_init_mq(struct request_queue *q) * * Disk isn't added yet, so verifying queue lock only manually. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_cancel_work_sync(q); err = blk_mq_init_sched(q, e); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (err) { pr_warn("\"%s\" elevator initialization failed, " @@ -614,11 +615,12 @@ void elevator_init_mq(struct request_queue *q) */ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { + unsigned int memflags; int ret; lockdep_assert_held(&q->sysfs_lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); if (q->elevator) { @@ -639,7 +641,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e) out_unfreeze: blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); if (ret) { pr_warn("elv: switch to \"%s\" failed, falling back to \"none\"\n", @@ -651,9 +653,11 @@ out_unfreeze: void elevator_disable(struct request_queue *q) { + unsigned int memflags; + lockdep_assert_held(&q->sysfs_lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); elv_unregister_queue(q); @@ -664,7 +668,7 @@ void elevator_disable(struct request_queue *q) blk_add_trace_msg(q, "elv switch: none"); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } /* diff --git a/block/fops.c b/block/fops.c index 6d5c4fc5a216..be9f1dbea9ce 100644 --- a/block/fops.c +++ b/block/fops.c @@ -783,11 +783,12 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) file_accessed(iocb->ki_filp); ret = blkdev_direct_IO(iocb, to); - if (ret >= 0) { + if (ret > 0) { iocb->ki_pos += ret; count -= ret; } - iov_iter_revert(to, count - iov_iter_count(to)); + if (ret != -EIOCBQUEUED) + iov_iter_revert(to, count - iov_iter_count(to)); if (ret < 0 || !count) goto reexpand; } diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 59fffe34c9d0..00ac0d7bb8c9 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -95,9 +95,13 @@ int __init fix_pxm_node_maps(int max_nid) int i, j, index = -1, count = 0; nodemask_t nodes_to_enable; - if (numa_off || srat_disabled()) + if (numa_off) return -1; + /* no or incomplete node/PXM mapping set, nothing to do */ + if (srat_disabled()) + return 0; + /* find fake nodes PXM mapping */ for (i = 0; i < MAX_NUMNODES; i++) { if (node_to_pxm_map[i] != PXM_INVAL) { @@ -117,6 +121,11 @@ int __init fix_pxm_node_maps(int max_nid) } } } + if (index == -1) { + pr_debug("No node/PXM mapping has been set\n"); + /* nothing more to be done */ + return 0; + } if (WARN(index != max_nid, "%d max nid when expected %d\n", index, max_nid)) return -1; diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index cb45ef5240da..068c1612660b 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -408,6 +408,19 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), }, { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + }, + { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c085dd81ebe7..63ec2f218431 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,6 +4143,10 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, + { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | + ATA_QUIRK_ZERO_AFTER_TRIM | + ATA_QUIRK_NO_NCQ_ON_ATI | + ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 67f277e1c3bf..5a46c066abc3 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -601,7 +601,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct page *page; - unsigned int offset; + unsigned int offset, count; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -617,25 +617,27 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) page = nth_page(page, (offset >> PAGE_SHIFT)); offset %= PAGE_SIZE; - trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size); + /* don't overrun current sg */ + count = min(qc->cursg->length - qc->cursg_ofs, qc->sect_size); + + trace_ata_sff_pio_transfer_data(qc, offset, count); /* * Split the transfer when it splits a page boundary. Note that the * split still has to be dword aligned like all ATA data transfers. */ WARN_ON_ONCE(offset % 4); - if (offset + qc->sect_size > PAGE_SIZE) { + if (offset + count > PAGE_SIZE) { unsigned int split_len = PAGE_SIZE - offset; ata_pio_xfer(qc, page, offset, split_len); - ata_pio_xfer(qc, nth_page(page, 1), 0, - qc->sect_size - split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, count - split_len); } else { - ata_pio_xfer(qc, page, offset, qc->sect_size); + ata_pio_xfer(qc, page, offset, count); } - qc->curbytes += qc->sect_size; - qc->cursg_ofs += qc->sect_size; + qc->curbytes += count; + qc->cursg_ofs += count; if (qc->cursg_ofs == qc->cursg->length) { qc->cursg = sg_next(qc->cursg); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index fdaa24bb641a..a7e511849875 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -599,6 +599,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(ghostwrite); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -614,6 +615,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -630,6 +632,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_ghostwrite.attr, NULL }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index cbc9a7a75def..d497d448e4b2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -656,13 +656,15 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy * so change its status accordingly. * * Otherwise, the device is going to be resumed, so set its PM-runtime - * status to "active", but do that only if DPM_FLAG_SMART_SUSPEND is set - * to avoid confusing drivers that don't use it. + * status to "active" unless its power.set_active flag is clear, in + * which case it is not necessary to update its PM-runtime status. */ - if (skip_resume) + if (skip_resume) { pm_runtime_set_suspended(dev); - else if (dev_pm_skip_suspend(dev)) + } else if (dev->power.set_active) { pm_runtime_set_active(dev); + dev->power.set_active = false; + } if (dev->pm_domain) { info = "noirq power domain "; @@ -1189,18 +1191,24 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev) +static void dpm_superior_set_must_resume(struct device *dev, bool set_active) { struct device_link *link; int idx; - if (dev->parent) + if (dev->parent) { dev->parent->power.must_resume = true; + if (set_active) + dev->parent->power.set_active = true; + } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { link->supplier->power.must_resume = true; + if (set_active) + link->supplier->power.set_active = true; + } device_links_read_unlock(idx); } @@ -1278,8 +1286,11 @@ Skip: dev->power.may_skip_resume)) dev->power.must_resume = true; - if (dev->power.must_resume) - dpm_superior_set_must_resume(dev); + if (dev->power.must_resume) { + dev->power.set_active = dev->power.set_active || + dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); + dpm_superior_set_must_resume(dev, dev->power.set_active); + } Complete: complete_all(&dev->power.completion); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 3523dd82d7a0..4db7f6ce8ade 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -226,10 +226,11 @@ aoedev_downdev(struct aoedev *d) /* fast fail all pending I/O */ if (d->blkq) { /* UP is cleared, freeze+quiesce to insure all are errored */ - blk_mq_freeze_queue(d->blkq); + unsigned int memflags = blk_mq_freeze_queue(d->blkq); + blk_mq_quiesce_queue(d->blkq); blk_mq_unquiesce_queue(d->blkq); - blk_mq_unfreeze_queue(d->blkq); + blk_mq_unfreeze_queue(d->blkq, memflags); } if (d->gd) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 110f9aca2667..a81ade622a01 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -746,6 +746,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) unsigned char *p; int sect, nsect; unsigned long flags; + unsigned int memflags; int ret; if (type) { @@ -758,7 +759,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) } q = unit[drive].disk[type]->queue; - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); local_irq_save(flags); @@ -817,7 +818,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) ret = FormatError ? -EIO : 0; out: blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); return ret; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1ec7417c7f00..c05fe27a96b6 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -586,6 +586,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, { struct file *file = fget(arg); struct file *old_file; + unsigned int memflags; int error; bool partscan; bool is_loop; @@ -623,14 +624,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, /* and ... switch */ disk_force_media_change(lo->lo_disk); - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); mapping_set_gfp_mask(file->f_mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); loop_update_dio(lo); - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; loop_global_unlock(lo, is_loop); @@ -1255,6 +1256,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; bool partscan = false; bool size_changed = false; + unsigned int memflags; err = mutex_lock_killable(&lo->lo_mutex); if (err) @@ -1272,7 +1274,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) } /* I/O needs to be drained before changing lo_offset or lo_sizelimit */ - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); err = loop_set_status_from_info(lo, info); if (err) @@ -1281,8 +1283,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) partscan = !(lo->lo_flags & LO_FLAGS_PARTSCAN) && (info->lo_flags & LO_FLAGS_PARTSCAN); - lo->lo_flags &= ~(LOOP_SET_STATUS_SETTABLE_FLAGS | - LOOP_SET_STATUS_CLEARABLE_FLAGS); + lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); if (size_changed) { @@ -1295,7 +1296,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) loop_update_dio(lo); out_unfreeze: - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); out_unlock: @@ -1447,6 +1448,7 @@ static int loop_set_capacity(struct loop_device *lo) static int loop_set_dio(struct loop_device *lo, unsigned long arg) { bool use_dio = !!arg; + unsigned int memflags; if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1460,18 +1462,19 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) vfs_fsync(lo->lo_backing_file, 0); } - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); if (use_dio) lo->lo_flags |= LO_FLAGS_DIRECT_IO; else lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); return 0; } static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { struct queue_limits lim; + unsigned int memflags; int err = 0; if (lo->lo_state != Lo_bound) @@ -1486,10 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) lim = queue_limits_start_update(lo->lo_queue); loop_update_limits(lo, &lim, arg); - blk_mq_freeze_queue(lo->lo_queue); + memflags = blk_mq_freeze_queue(lo->lo_queue); err = queue_limits_commit_update(lo->lo_queue, &lim); loop_update_dio(lo); - blk_mq_unfreeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue, memflags); return err; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b63a0f29a54a..7bdc7eb808ea 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1234,6 +1234,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; + unsigned int memflags; int err; /* Arg will be cast to int, check it to avoid overflow */ @@ -1247,7 +1248,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, * We need to make sure we don't get any errant requests while we're * reallocating the ->socks array. */ - blk_mq_freeze_queue(nbd->disk->queue); + memflags = blk_mq_freeze_queue(nbd->disk->queue); if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) @@ -1288,12 +1289,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, INIT_WORK(&nsock->work, nbd_pending_cmd_work); socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); - blk_mq_unfreeze_queue(nbd->disk->queue); + blk_mq_unfreeze_queue(nbd->disk->queue, memflags); return 0; put_socket: - blk_mq_unfreeze_queue(nbd->disk->queue); + blk_mq_unfreeze_queue(nbd->disk->queue, memflags); sockfd_put(sock); return err; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5b393e4a1ddf..faafd7ff43d6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7281,9 +7281,10 @@ static ssize_t do_rbd_remove(const char *buf, size_t count) * Prevent new IO from being queued and wait for existing * IO to complete/fail. */ - blk_mq_freeze_queue(rbd_dev->disk->queue); + unsigned int memflags = blk_mq_freeze_queue(rbd_dev->disk->queue); + blk_mark_disk_dead(rbd_dev->disk); - blk_mq_unfreeze_queue(rbd_dev->disk->queue); + blk_mq_unfreeze_queue(rbd_dev->disk->queue, memflags); } del_gendisk(rbd_dev->disk); diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index e4d1e7284dae..33b3bc99d532 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -1113,6 +1113,7 @@ static void vdc_requeue_inflight(struct vdc_port *port) static void vdc_queue_drain(struct vdc_port *port) { struct request_queue *q = port->disk->queue; + unsigned int memflags; /* * Mark the queue as draining, then freeze/quiesce to ensure @@ -1121,12 +1122,12 @@ static void vdc_queue_drain(struct vdc_port *port) port->drain = 1; spin_unlock_irq(&port->vio.lock); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&port->vio.lock); port->drain = 0; - blk_mq_unquiesce_queue(q); + blk_mq_unquiesce_queue(q, memflags); blk_mq_unfreeze_queue(q); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 9914153b365b..3aedcb5add61 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -840,6 +840,7 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state, static void release_drive(struct floppy_state *fs) { struct request_queue *q = disks[fs->index]->queue; + unsigned int memflags; unsigned long flags; swim3_dbg("%s", "-> release drive\n"); @@ -848,10 +849,10 @@ static void release_drive(struct floppy_state *fs) fs->state = idle; spin_unlock_irqrestore(&swim3_lock, flags); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); } static int fd_eject(struct floppy_state *fs) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index bfbe391c20fe..6a61ec35f426 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1583,11 +1583,12 @@ static int virtblk_freeze_priv(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; struct request_queue *q = vblk->disk->queue; + unsigned int memflags; /* Ensure no requests in virtqueues before deleting vqs. */ - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue_nowait(q); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 1230045d78a5..aa5ec1d444a9 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1381,13 +1381,12 @@ static void btnxpuart_tx_work(struct work_struct *work) while ((skb = nxp_dequeue(nxpdev))) { len = serdev_device_write_buf(serdev, skb->data, skb->len); - serdev_device_wait_until_sent(serdev, 0); hdev->stat.byte_tx += len; skb_pull(skb, len); if (skb->len > 0) { skb_queue_head(&nxpdev->txq, skb); - break; + continue; } switch (hci_skb_pkt_type(skb)) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9aa018d4f6f5..90966dfbd278 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -899,11 +899,6 @@ static void btusb_reset(struct hci_dev *hdev) struct btusb_data *data; int err; - if (hdev->reset) { - hdev->reset(hdev); - return; - } - data = hci_get_drvdata(hdev); /* This is not an unbalanced PM reference since the device will reset */ err = usb_autopm_get_interface(data->intf); @@ -2639,8 +2634,15 @@ static void btusb_mtk_claim_iso_intf(struct btusb_data *data) struct btmtk_data *btmtk_data = hci_get_priv(data->hdev); int err; + /* + * The function usb_driver_claim_interface() is documented to need + * locks held if it's not called from a probe routine. The code here + * is called from the hci_power_on workqueue, so grab the lock. + */ + device_lock(&btmtk_data->isopkt_intf->dev); err = usb_driver_claim_interface(&btusb_driver, btmtk_data->isopkt_intf, data); + device_unlock(&btmtk_data->isopkt_intf->dev); if (err < 0) { btmtk_data->isopkt_intf = NULL; bt_dev_err(data->hdev, "Failed to claim iso interface"); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 704e84d00639..0ee5c691fb36 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -17,7 +17,7 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM config ARM_AIROHA_SOC_CPUFREQ tristate "Airoha EN7581 SoC CPUFreq support" - depends on ARCH_AIROHA || COMPILE_TEST + depends on (ARCH_AIROHA && OF) || COMPILE_TEST select PM_OPP default ARCH_AIROHA help diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 302df42d6887..463b69a2dff5 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,11 +909,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) { - set_boost(policy, acpi_cpufreq_driver.boost_enabled); - policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; - } - return result; err_unreg: diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 2486a6c5256a..8f512448382f 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -611,7 +611,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf); - policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); + policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ? + caps->highest_perf : caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -619,7 +620,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf); - policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf); + policy->cpuinfo.max_freq = policy->max; policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1076e37a18ad..e0048856ecee 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1410,10 +1410,6 @@ static int cpufreq_online(unsigned int cpu) goto out_free_policy; } - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) - policy->boost_enabled = true; - /* * The initialization has succeeded and the policy is online. * If there is a problem with its frequency table, take it @@ -1476,6 +1472,10 @@ static int cpufreq_online(unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); + } else { + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + goto out_destroy_policy; } if (cpufreq_driver->get && has_target()) { @@ -1570,6 +1570,18 @@ static int cpufreq_online(unsigned int cpu) if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); + /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ + if (policy->boost_enabled != cpufreq_boost_enabled()) { + policy->boost_enabled = cpufreq_boost_enabled(); + ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); + if (ret) { + /* If the set_boost fails, the online operation is not affected */ + pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu, + policy->boost_enabled ? "enable" : "disable"); + policy->boost_enabled = !policy->boost_enabled; + } + } + pr_debug("initialization complete\n"); return 0; diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c index c6bdfc308e99..9cef71528076 100644 --- a/drivers/cpufreq/s3c64xx-cpufreq.c +++ b/drivers/cpufreq/s3c64xx-cpufreq.c @@ -24,6 +24,7 @@ struct s3c64xx_dvfs { unsigned int vddarm_max; }; +#ifdef CONFIG_REGULATOR static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { [0] = { 1000000, 1150000 }, [1] = { 1050000, 1150000 }, @@ -31,6 +32,7 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { [3] = { 1200000, 1350000 }, [4] = { 1300000, 1350000 }, }; +#endif static struct cpufreq_frequency_table s3c64xx_freq_table[] = { { 0, 0, 66000 }, @@ -51,15 +53,16 @@ static struct cpufreq_frequency_table s3c64xx_freq_table[] = { static int s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { - struct s3c64xx_dvfs *dvfs; - unsigned int old_freq, new_freq; + unsigned int new_freq = s3c64xx_freq_table[index].frequency; int ret; +#ifdef CONFIG_REGULATOR + struct s3c64xx_dvfs *dvfs; + unsigned int old_freq; + old_freq = clk_get_rate(policy->clk) / 1000; - new_freq = s3c64xx_freq_table[index].frequency; dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data]; -#ifdef CONFIG_REGULATOR if (vddarm && new_freq > old_freq) { ret = regulator_set_voltage(vddarm, dvfs->vddarm_min, diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 173ddcac540a..8fe5e1b47ef9 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -41,11 +41,7 @@ * idle state 2, the third bin spans from the target residency of idle state 2 * up to, but not including, the target residency of idle state 3 and so on. * The last bin spans from the target residency of the deepest idle state - * supplied by the driver to the scheduler tick period length or to infinity if - * the tick period length is less than the target residency of that state. In - * the latter case, the governor also counts events with the measured idle - * duration between the tick period length and the target residency of the - * deepest idle state. + * supplied by the driver to infinity. * * Two metrics called "hits" and "intercepts" are associated with each bin. * They are updated every time before selecting an idle state for the given CPU @@ -60,6 +56,10 @@ * into by the sleep length (these events are also referred to as "intercepts" * below). * + * The governor also counts "intercepts" with the measured idle duration below + * the tick period length and uses this information when deciding whether or not + * to stop the scheduler tick. + * * In order to select an idle state for a CPU, the governor takes the following * steps (modulo the possible latency constraint that must be taken into account * too): @@ -106,6 +106,12 @@ #include "gov.h" /* + * Idle state exit latency threshold used for deciding whether or not to check + * the time till the closest expected timer event. + */ +#define LATENCY_THRESHOLD_NS (RESIDENCY_THRESHOLD_NS / 2) + +/* * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value * is used for decreasing metrics on a regular basis. */ @@ -124,18 +130,20 @@ struct teo_bin { /** * struct teo_cpu - CPU data used by the TEO cpuidle governor. - * @time_span_ns: Time between idle state selection and post-wakeup update. * @sleep_length_ns: Time till the closest timer event (at the selection time). * @state_bins: Idle state data bins for this CPU. * @total: Grand total of the "intercepts" and "hits" metrics for all bins. - * @tick_hits: Number of "hits" after TICK_NSEC. + * @tick_intercepts: "Intercepts" before TICK_NSEC. + * @short_idles: Wakeups after short idle periods. + * @artificial_wakeup: Set if the wakeup has been triggered by a safety net. */ struct teo_cpu { - s64 time_span_ns; s64 sleep_length_ns; struct teo_bin state_bins[CPUIDLE_STATE_MAX]; unsigned int total; - unsigned int tick_hits; + unsigned int tick_intercepts; + unsigned int short_idles; + bool artificial_wakeup; }; static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); @@ -152,23 +160,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) s64 target_residency_ns; u64 measured_ns; - if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { + cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT; + + if (cpu_data->artificial_wakeup) { /* - * One of the safety nets has triggered or the wakeup was close - * enough to the closest timer event expected at the idle state - * selection time to be discarded. + * If one of the safety nets has triggered, assume that this + * might have been a long sleep. */ measured_ns = U64_MAX; } else { u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns; - /* - * The computations below are to determine whether or not the - * (saved) time till the next timer event and the measured idle - * duration fall into the same "bin", so use last_residency_ns - * for that instead of time_span_ns which includes the cpuidle - * overhead. - */ measured_ns = dev->last_residency_ns; /* * The delay between the wakeup and the first instruction @@ -176,14 +178,16 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * time, so take 1/2 of the exit latency as a very rough * approximation of the average of it. */ - if (measured_ns >= lat_ns) + if (measured_ns >= lat_ns) { measured_ns -= lat_ns / 2; - else + if (measured_ns < RESIDENCY_THRESHOLD_NS) + cpu_data->short_idles += PULSE; + } else { measured_ns /= 2; + cpu_data->short_idles += PULSE; + } } - cpu_data->total = 0; - /* * Decay the "hits" and "intercepts" metrics for all of the bins and * find the bins that the sleep length and the measured idle duration @@ -195,8 +199,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) bin->hits -= bin->hits >> DECAY_SHIFT; bin->intercepts -= bin->intercepts >> DECAY_SHIFT; - cpu_data->total += bin->hits + bin->intercepts; - target_residency_ns = drv->states[i].target_residency_ns; if (target_residency_ns <= cpu_data->sleep_length_ns) { @@ -206,38 +208,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) } } - /* - * If the deepest state's target residency is below the tick length, - * make a record of it to help teo_select() decide whether or not - * to stop the tick. This effectively adds an extra hits-only bin - * beyond the last state-related one. - */ - if (target_residency_ns < TICK_NSEC) { - cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT; - - cpu_data->total += cpu_data->tick_hits; - - if (TICK_NSEC <= cpu_data->sleep_length_ns) { - idx_timer = drv->state_count; - if (TICK_NSEC <= measured_ns) { - cpu_data->tick_hits += PULSE; - goto end; - } - } - } - + cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT; /* * If the measured idle duration falls into the same bin as the sleep * length, this is a "hit", so update the "hits" metric for that bin. * Otherwise, update the "intercepts" metric for the bin fallen into by * the measured idle duration. */ - if (idx_timer == idx_duration) + if (idx_timer == idx_duration) { cpu_data->state_bins[idx_timer].hits += PULSE; - else + } else { cpu_data->state_bins[idx_duration].intercepts += PULSE; + if (TICK_NSEC <= measured_ns) + cpu_data->tick_intercepts += PULSE; + } -end: + cpu_data->total -= cpu_data->total >> DECAY_SHIFT; cpu_data->total += PULSE; } @@ -285,14 +271,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); ktime_t delta_tick = TICK_NSEC / 2; - unsigned int tick_intercept_sum = 0; unsigned int idx_intercept_sum = 0; unsigned int intercept_sum = 0; unsigned int idx_hit_sum = 0; unsigned int hit_sum = 0; int constraint_idx = 0; int idx0 = 0, idx = -1; - int prev_intercept_idx; s64 duration_ns; int i; @@ -301,10 +285,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, dev->last_state_idx = -1; } - cpu_data->time_span_ns = local_clock(); /* - * Set the expected sleep length to infinity in case of an early - * return. + * Set the sleep length to infinity in case the invocation of + * tick_nohz_get_sleep_length() below is skipped, in which case it won't + * be known whether or not the subsequent wakeup is caused by a timer. + * It is generally fine to count the wakeup as an intercept then, except + * for the cases when the CPU is mostly woken up by timers and there may + * be opportunities to ask for a deeper idle state when no imminent + * timers are scheduled which may be missed. */ cpu_data->sleep_length_ns = KTIME_MAX; @@ -360,17 +348,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, goto end; } - tick_intercept_sum = intercept_sum + - cpu_data->state_bins[drv->state_count-1].intercepts; - /* * If the sum of the intercepts metric for all of the idle states * shallower than the current candidate one (idx) is greater than the * sum of the intercepts and hits metrics for the candidate state and - * all of the deeper states a shallower idle state is likely to be a + * all of the deeper states, a shallower idle state is likely to be a * better choice. */ - prev_intercept_idx = idx; if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) { int first_suitable_idx = idx; @@ -396,41 +380,38 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * first enabled state that is deep enough. */ if (teo_state_ok(i, drv) && - !dev->states_usage[i].disable) + !dev->states_usage[i].disable) { idx = i; - else - idx = first_suitable_idx; - + break; + } + idx = first_suitable_idx; break; } if (dev->states_usage[i].disable) continue; - if (!teo_state_ok(i, drv)) { + if (teo_state_ok(i, drv)) { /* - * The current state is too shallow, but if an - * alternative candidate state has been found, - * it may still turn out to be a better choice. + * The current state is deep enough, but still + * there may be a better one. */ - if (first_suitable_idx != idx) - continue; - - break; + first_suitable_idx = i; + continue; } - first_suitable_idx = i; + /* + * The current state is too shallow, so if no suitable + * states other than the initial candidate have been + * found, give up (the remaining states to check are + * shallower still), but otherwise the first suitable + * state other than the initial candidate may turn out + * to be preferable. + */ + if (first_suitable_idx == idx) + break; } } - if (!idx && prev_intercept_idx) { - /* - * We have to query the sleep length here otherwise we don't - * know after wakeup if our guess was correct. - */ - duration_ns = tick_nohz_get_sleep_length(&delta_tick); - cpu_data->sleep_length_ns = duration_ns; - goto out_tick; - } /* * If there is a latency constraint, it may be necessary to select an @@ -440,24 +421,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = constraint_idx; /* - * Skip the timers check if state 0 is the current candidate one, - * because an immediate non-timer wakeup is expected in that case. + * If either the candidate state is state 0 or its target residency is + * low enough, there is basically nothing more to do, but if the sleep + * length is not updated, the subsequent wakeup will be counted as an + * "intercept" which may be problematic in the cases when timer wakeups + * are dominant. Namely, it may effectively prevent deeper idle states + * from being selected at one point even if no imminent timers are + * scheduled. + * + * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one + * CPU are unlikely (user space has a default 50 us slack value for + * hrtimers and there are relatively few timers with a lower deadline + * value in the kernel), and even if they did happen, the potential + * benefit from using a deep idle state in that case would be + * questionable anyway for latency reasons. Thus if the measured idle + * duration falls into that range in the majority of cases, assume + * non-timer wakeups to be dominant and skip updating the sleep length + * to reduce latency. + * + * Also, if the latency constraint is sufficiently low, it will force + * shallow idle states regardless of the wakeup type, so the sleep + * length need not be known in that case. */ - if (!idx) - goto out_tick; - - /* - * If state 0 is a polling one, check if the target residency of - * the current candidate state is low enough and skip the timers - * check in that case too. - */ - if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) && - drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) + if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && + (2 * cpu_data->short_idles >= cpu_data->total || + latency_req < LATENCY_THRESHOLD_NS)) goto out_tick; duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; + if (!idx) + goto out_tick; + /* * If the closest expected timer is before the target residency of the * candidate state, a shallower one needs to be found. @@ -474,7 +470,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * total wakeup events, do not stop the tick. */ if (drv->states[idx].target_residency_ns < TICK_NSEC && - tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8) + cpu_data->tick_intercepts > cpu_data->total / 2 + cpu_data->total / 8) duration_ns = TICK_NSEC / 2; end: @@ -511,17 +507,16 @@ static void teo_reflect(struct cpuidle_device *dev, int state) struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); dev->last_state_idx = state; - /* - * If the wakeup was not "natural", but triggered by one of the safety - * nets, assume that the CPU might have been idle for the entire sleep - * length time. - */ if (dev->poll_time_limit || (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) { + /* + * The wakeup was not "genuine", but triggered by one of the + * safety nets. + */ dev->poll_time_limit = false; - cpu_data->time_span_ns = cpu_data->sleep_length_ns; + cpu_data->artificial_wakeup = true; } else { - cpu_data->time_span_ns = local_clock() - cpu_data->time_span_ns; + cpu_data->artificial_wakeup = false; } } diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index ee09269c63b5..0a883091259a 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,15 +6,13 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT - select REGMAP + depends on KUNIT && REGMAP select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT + depends on KUNIT && REGMAP default KUNIT_ALL_TESTS - select REGMAP select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 4cb455b2bdee..619b6fb9d833 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -490,8 +490,7 @@ static int mxc_gpio_probe(struct platform_device *pdev) port->gc.request = mxc_gpio_request; port->gc.free = mxc_gpio_free; port->gc.to_irq = mxc_gpio_to_irq; - port->gc.base = (pdev->id < 0) ? of_alias_get_id(np, "gpio") * 32 : - pdev->id * 32; + port->gc.base = of_alias_get_id(np, "gpio") * 32; err = devm_gpiochip_add_data(&pdev->dev, &port->gc, port); if (err) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index c1772f44b1d7..2523221a2519 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4021,17 +4021,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1405e8affd48..d4593374e7a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2325,9 +2325,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { + while (halt_if_hws_hang) + schedule(); if (reset_queues_on_hws_hang(dqm)) { - while (halt_if_hws_hang) - schedule(); dqm->is_hws_hang = true; kfd_hws_hang(dqm); retval = -ETIME; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9df56f8e09f9..bcddd989c7f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -86,9 +86,12 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); - if (dev->kfd->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && down_read_trylock(&dev->adev->reset_domain->sem)) { amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b0e66c05d811..ac3fd81fecef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12326,10 +12326,14 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { - amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; - amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; - if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) - freesync_capable = true; + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { + amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; + amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; + if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) + freesync_capable = true; + } + parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); if (vsdb_info.replay_mode) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 1f974ea3b0c6..1648226586e2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -89,7 +89,7 @@ #define mmCLK1_CLK4_ALLOW_DS 0x16EA8 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 -#define mmCLK5_spll_field_8 0x1B04B +#define mmCLK5_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f6b029354327..83163d7c7f00 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1732,7 +1732,6 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_mm_activity = 0; /* Valid power data is available only from primary die */ if (aldebaran_is_primary(smu)) { diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index eec95c724b25..fc438f445771 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -756,6 +756,7 @@ config I2C_IMX config I2C_IMX_LPI2C tristate "IMX Low Power I2C interface" depends on ARCH_MXC || COMPILE_TEST + select I2C_SLAVE help Say Y here if you want to use the Low Power IIC bus controller on the Freescale i.MX processors. diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index e2c2a2ef1c12..5546184df05f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -583,6 +583,9 @@ static int i2c_device_probe(struct device *dev) goto err_detach_pm_domain; } + client->debugfs = debugfs_create_dir(dev_name(&client->dev), + client->adapter->debugfs); + if (driver->probe) status = driver->probe(client); else @@ -602,6 +605,7 @@ static int i2c_device_probe(struct device *dev) return 0; err_release_driver_resources: + debugfs_remove_recursive(client->debugfs); devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: dev_pm_domain_detach(&client->dev, do_power_on); @@ -627,6 +631,8 @@ static void i2c_device_remove(struct device *dev) driver->remove(client); } + debugfs_remove_recursive(client->debugfs); + devres_release_group(&client->dev, client->devres_group_id); dev_pm_domain_detach(&client->dev, true); @@ -1015,8 +1021,6 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf if (status) goto out_remove_swnode; - client->debugfs = debugfs_create_dir(dev_name(&client->dev), adap->debugfs); - dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", client->name, dev_name(&client->dev)); @@ -1061,7 +1065,6 @@ void i2c_unregister_device(struct i2c_client *client) if (ACPI_COMPANION(&client->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev)); - debugfs_remove_recursive(client->debugfs); device_remove_software_node(&client->dev); device_unregister(&client->dev); } diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index ec4ecd96e6b1..23c09d22fcdb 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2355,7 +2355,10 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - + if (bitmap->mddev->bitmap_info.external) + return -ENOENT; + if (!bitmap->storage.sb_page) /* no superblock */ + return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); kunmap_local(sb); diff --git a/drivers/md/md.c b/drivers/md/md.c index 22f7bd3b94d5..30b3dbbce2d2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8376,6 +8376,10 @@ static int md_seq_show(struct seq_file *seq, void *v) return 0; spin_unlock(&all_mddevs_lock); + + /* prevent bitmap to be freed after checking */ + mutex_lock(&mddev->bitmap_info.mutex); + spin_lock(&mddev->lock); if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { seq_printf(seq, "%s : ", mdname(mddev)); @@ -8451,6 +8455,7 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } spin_unlock(&mddev->lock); + mutex_unlock(&mddev->bitmap_info.mutex); spin_lock(&all_mddevs_lock); if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs)) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index a10d4f4d9f95..deadbcea5e22 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -790,27 +790,14 @@ static const u8 uvc_media_transport_input_guid[16] = UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT; static const u8 uvc_processing_guid[16] = UVC_GUID_UVC_PROCESSING; -static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, - u16 id, unsigned int num_pads, - unsigned int extra_size) +static struct uvc_entity *uvc_alloc_entity(u16 type, u16 id, + unsigned int num_pads, unsigned int extra_size) { struct uvc_entity *entity; unsigned int num_inputs; unsigned int size; unsigned int i; - /* Per UVC 1.1+ spec 3.7.2, the ID should be non-zero. */ - if (id == 0) { - dev_err(&dev->udev->dev, "Found Unit with invalid ID 0.\n"); - return ERR_PTR(-EINVAL); - } - - /* Per UVC 1.1+ spec 3.7.2, the ID is unique. */ - if (uvc_entity_by_id(dev, id)) { - dev_err(&dev->udev->dev, "Found multiple Units with ID %u\n", id); - return ERR_PTR(-EINVAL); - } - extra_size = roundup(extra_size, sizeof(*entity->pads)); if (num_pads) num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; @@ -820,7 +807,7 @@ static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, + num_inputs; entity = kzalloc(size, GFP_KERNEL); if (entity == NULL) - return ERR_PTR(-ENOMEM); + return NULL; entity->id = id; entity->type = type; @@ -932,10 +919,10 @@ static int uvc_parse_vendor_control(struct uvc_device *dev, break; } - unit = uvc_alloc_new_entity(dev, UVC_VC_EXTENSION_UNIT, - buffer[3], p + 1, 2 * n); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(UVC_VC_EXTENSION_UNIT, buffer[3], + p + 1, 2*n); + if (unit == NULL) + return -ENOMEM; memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; @@ -1044,10 +1031,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - term = uvc_alloc_new_entity(dev, type | UVC_TERM_INPUT, - buffer[3], 1, n + p); - if (IS_ERR(term)) - return PTR_ERR(term); + term = uvc_alloc_entity(type | UVC_TERM_INPUT, buffer[3], + 1, n + p); + if (term == NULL) + return -ENOMEM; if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) { term->camera.bControlSize = n; @@ -1103,10 +1090,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return 0; } - term = uvc_alloc_new_entity(dev, type | UVC_TERM_OUTPUT, - buffer[3], 1, 0); - if (IS_ERR(term)) - return PTR_ERR(term); + term = uvc_alloc_entity(type | UVC_TERM_OUTPUT, buffer[3], + 1, 0); + if (term == NULL) + return -ENOMEM; memcpy(term->baSourceID, &buffer[7], 1); @@ -1125,10 +1112,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], - p + 1, 0); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, 0); + if (unit == NULL) + return -ENOMEM; memcpy(unit->baSourceID, &buffer[5], p); @@ -1148,9 +1134,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], 2, n); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(buffer[2], buffer[3], 2, n); + if (unit == NULL) + return -ENOMEM; memcpy(unit->baSourceID, &buffer[4], 1); unit->processing.wMaxMultiplier = @@ -1177,10 +1163,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], - p + 1, n); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, n); + if (unit == NULL) + return -ENOMEM; memcpy(unit->guid, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; @@ -1320,10 +1305,9 @@ static int uvc_gpio_parse(struct uvc_device *dev) return dev_err_probe(&dev->intf->dev, irq, "No IRQ for privacy GPIO\n"); - unit = uvc_alloc_new_entity(dev, UVC_EXT_GPIO_UNIT, - UVC_EXT_GPIO_UNIT_ID, 0, 1); - if (IS_ERR(unit)) - return PTR_ERR(unit); + unit = uvc_alloc_entity(UVC_EXT_GPIO_UNIT, UVC_EXT_GPIO_UNIT_ID, 0, 1); + if (!unit) + return -ENOMEM; unit->gpio.gpio_privacy = gpio_privacy; unit->gpio.irq = irq; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index ee7e1d908986..847c11542f02 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -404,6 +404,7 @@ out_list_del: int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) { unsigned long flags; + unsigned int memflags; lockdep_assert_held(&mtd_table_mutex); @@ -420,10 +421,10 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) spin_unlock_irqrestore(&old->queue_lock, flags); /* freeze+quiesce queue to ensure all requests are flushed */ - blk_mq_freeze_queue(old->rq); + memflags = blk_mq_freeze_queue(old->rq); blk_mq_quiesce_queue(old->rq); blk_mq_unquiesce_queue(old->rq); - blk_mq_unfreeze_queue(old->rq); + blk_mq_unfreeze_queue(old->rq, memflags); /* If the device is currently open, tell trans driver to close it, then put mtd device, and don't touch it again */ diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 30be4ed68fad..ef6a22f372f9 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1537,7 +1537,7 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp) if (token) { int err = kstrtoint(token, 10, &p->ubi_num); - if (err) { + if (err || p->ubi_num < UBI_DEV_NUM_AUTO) { pr_err("UBI error: bad value for ubi_num parameter: %s\n", token); return -EINVAL; diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 6bb80d7714bc..b700a0efaa93 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -828,6 +828,70 @@ out_free: return err; } +static int ubi_get_ec_info(struct ubi_device *ubi, struct ubi_ecinfo_req __user *ureq) +{ + struct ubi_ecinfo_req req; + struct ubi_wl_entry *wl; + int read_cnt; + int peb; + int end_peb; + + /* Copy the input arguments */ + if (copy_from_user(&req, ureq, sizeof(struct ubi_ecinfo_req))) + return -EFAULT; + + /* Check input arguments */ + if (req.length <= 0 || req.start < 0 || req.start >= ubi->peb_count) + return -EINVAL; + + if (check_add_overflow(req.start, req.length, &end_peb)) + return -EINVAL; + + if (end_peb > ubi->peb_count) + end_peb = ubi->peb_count; + + /* Check access rights before filling erase_counters array */ + if (!access_ok((void __user *)ureq->erase_counters, + (end_peb-req.start) * sizeof(int32_t))) + return -EFAULT; + + /* Fill erase counter array */ + read_cnt = 0; + for (peb = req.start; peb < end_peb; read_cnt++, peb++) { + int ec; + + if (ubi_io_is_bad(ubi, peb)) { + if (__put_user(UBI_UNKNOWN, ureq->erase_counters+read_cnt)) + return -EFAULT; + + continue; + } + + spin_lock(&ubi->wl_lock); + + wl = ubi->lookuptbl[peb]; + if (wl) + ec = wl->ec; + else + ec = UBI_UNKNOWN; + + spin_unlock(&ubi->wl_lock); + + if (__put_user(ec, ureq->erase_counters+read_cnt)) + return -EFAULT; + + } + + /* Return actual read length */ + req.read_length = read_cnt; + + /* Copy everything except erase counter array */ + if (copy_to_user(ureq, &req, sizeof(struct ubi_ecinfo_req))) + return -EFAULT; + + return 0; +} + static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -991,6 +1055,12 @@ static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, break; } + case UBI_IOCECNFO: + { + err = ubi_get_ec_info(ubi, argp); + break; + } + default: err = -ENOTTY; break; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 26cc53ad34ec..c792b9bcab9b 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -549,7 +549,6 @@ struct ubi_debug_info { * @peb_buf: a buffer of PEB size used for different purposes * @buf_mutex: protects @peb_buf * @ckvol_mutex: serializes static volume checking when opening - * @wl_reboot_notifier: close all wear-leveling work before reboot * * @dbg: debugging information for this UBI device */ @@ -652,7 +651,6 @@ struct ubi_device { void *peb_buf; struct mutex buf_mutex; struct mutex ckvol_mutex; - struct notifier_block wl_reboot_notifier; struct ubi_debug_info dbg; }; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 4f6f339d8fb8..fbd399cf6503 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -89,7 +89,6 @@ #include <linux/crc32.h> #include <linux/freezer.h> #include <linux/kthread.h> -#include <linux/reboot.h> #include "ubi.h" #include "wl.h" @@ -128,8 +127,6 @@ static int self_check_in_wl_tree(const struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root); static int self_check_in_pq(const struct ubi_device *ubi, struct ubi_wl_entry *e); -static int ubi_wl_reboot_notifier(struct notifier_block *n, - unsigned long state, void *cmd); /** * wl_tree_add - add a wear-leveling entry to a WL RB-tree. @@ -1953,13 +1950,6 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (!ubi->ro_mode && !ubi->fm_disabled) ubi_ensure_anchor_pebs(ubi); #endif - - if (!ubi->wl_reboot_notifier.notifier_call) { - ubi->wl_reboot_notifier.notifier_call = ubi_wl_reboot_notifier; - ubi->wl_reboot_notifier.priority = 1; /* Higher than MTD */ - register_reboot_notifier(&ubi->wl_reboot_notifier); - } - return 0; out_free: @@ -2005,17 +1995,6 @@ void ubi_wl_close(struct ubi_device *ubi) kfree(ubi->lookuptbl); } -static int ubi_wl_reboot_notifier(struct notifier_block *n, - unsigned long state, void *cmd) -{ - struct ubi_device *ubi; - - ubi = container_of(n, struct ubi_device, wl_reboot_notifier); - ubi_wl_close(ubi); - - return NOTIFY_DONE; -} - /** * self_check_ec - make sure that the erase counter of a PEB is correct. * @ubi: UBI device description object diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7b78c2bada81..e45bba240cbc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1538,17 +1538,20 @@ static netdev_features_t bond_fix_features(struct net_device *dev, NETIF_F_HIGHDMA | NETIF_F_LRO) #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ - NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) + NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE | \ + NETIF_F_GSO_PARTIAL) #define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_GSO_SOFTWARE) +#define BOND_GSO_PARTIAL_FEATURES (NETIF_F_GSO_ESP) + static void bond_compute_features(struct bonding *bond) { + netdev_features_t gso_partial_features = BOND_GSO_PARTIAL_FEATURES; unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; - netdev_features_t gso_partial_features = NETIF_F_GSO_ESP; netdev_features_t vlan_features = BOND_VLAN_FEATURES; netdev_features_t enc_features = BOND_ENC_FEATURES; #ifdef CONFIG_XFRM_OFFLOAD @@ -1582,8 +1585,9 @@ static void bond_compute_features(struct bonding *bond) BOND_XFRM_FEATURES); #endif /* CONFIG_XFRM_OFFLOAD */ - if (slave->dev->hw_enc_features & NETIF_F_GSO_PARTIAL) - gso_partial_features &= slave->dev->gso_partial_features; + gso_partial_features = netdev_increment_features(gso_partial_features, + slave->dev->gso_partial_features, + BOND_GSO_PARTIAL_FEATURES); mpls_features = netdev_increment_features(mpls_features, slave->dev->mpls_features, @@ -1598,12 +1602,8 @@ static void bond_compute_features(struct bonding *bond) } bond_dev->hard_header_len = max_hard_header_len; - if (gso_partial_features & NETIF_F_GSO_ESP) - bond_dev->gso_partial_features |= NETIF_F_GSO_ESP; - else - bond_dev->gso_partial_features &= ~NETIF_F_GSO_ESP; - done: + bond_dev->gso_partial_features = gso_partial_features; bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | NETIF_F_HW_VLAN_CTAG_TX | @@ -6046,6 +6046,7 @@ void bond_setup(struct net_device *bond_dev) bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + bond_dev->features |= NETIF_F_GSO_PARTIAL; #ifdef CONFIG_XFRM_OFFLOAD bond_dev->hw_features |= BOND_XFRM_FEATURES; /* Only enable XFRM features if this is an active-backup config */ diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index d73ef262991d..6fee9a41839c 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -328,8 +328,7 @@ #define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */ #define BGMAC_RX_BUF_OFFSET (NET_SKB_PAD + NET_IP_ALIGN - \ BGMAC_RX_FRAME_OFFSET) -/* Jumbo frame size with FCS */ -#define BGMAC_RX_MAX_FRAME_SIZE 9724 +#define BGMAC_RX_MAX_FRAME_SIZE 1536 #define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE) #define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 9cc8db10a8d6..1c94bf1db718 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7424,7 +7424,7 @@ static void tg3_napi_enable(struct tg3 *tp) for (i = 0; i < tp->irq_cnt; i++) { tnapi = &tp->napi[i]; - napi_enable(&tnapi->napi); + napi_enable_locked(&tnapi->napi); if (tnapi->tx_buffers) { netif_queue_set_napi(tp->dev, txq_idx, NETDEV_QUEUE_TYPE_TX, @@ -7445,9 +7445,10 @@ static void tg3_napi_init(struct tg3 *tp) int i; for (i = 0; i < tp->irq_cnt; i++) { - netif_napi_add(tp->dev, &tp->napi[i].napi, - i ? tg3_poll_msix : tg3_poll); - netif_napi_set_irq(&tp->napi[i].napi, tp->napi[i].irq_vec); + netif_napi_add_locked(tp->dev, &tp->napi[i].napi, + i ? tg3_poll_msix : tg3_poll); + netif_napi_set_irq_locked(&tp->napi[i].napi, + tp->napi[i].irq_vec); } } @@ -11259,6 +11260,8 @@ static void tg3_timer_stop(struct tg3 *tp) static int tg3_restart_hw(struct tg3 *tp, bool reset_phy) __releases(tp->lock) __acquires(tp->lock) + __releases(tp->dev->lock) + __acquires(tp->dev->lock) { int err; @@ -11271,7 +11274,9 @@ static int tg3_restart_hw(struct tg3 *tp, bool reset_phy) tg3_timer_stop(tp); tp->irq_sync = 0; tg3_napi_enable(tp); + netdev_unlock(tp->dev); dev_close(tp->dev); + netdev_lock(tp->dev); tg3_full_lock(tp, 0); } return err; @@ -11299,6 +11304,7 @@ static void tg3_reset_task(struct work_struct *work) tg3_netif_stop(tp); + netdev_lock(tp->dev); tg3_full_lock(tp, 1); if (tg3_flag(tp, TX_RECOVERY_PENDING)) { @@ -11318,12 +11324,14 @@ static void tg3_reset_task(struct work_struct *work) * call cancel_work_sync() and wait forever. */ tg3_flag_clear(tp, RESET_TASK_PENDING); + netdev_unlock(tp->dev); dev_close(tp->dev); goto out; } tg3_netif_start(tp); tg3_full_unlock(tp); + netdev_unlock(tp->dev); tg3_phy_start(tp); tg3_flag_clear(tp, RESET_TASK_PENDING); out: @@ -11683,9 +11691,11 @@ static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq, if (err) goto out_ints_fini; + netdev_lock(dev); tg3_napi_init(tp); tg3_napi_enable(tp); + netdev_unlock(dev); for (i = 0; i < tp->irq_cnt; i++) { err = tg3_request_irq(tp, i); @@ -12569,6 +12579,7 @@ static int tg3_set_ringparam(struct net_device *dev, irq_sync = 1; } + netdev_lock(dev); tg3_full_lock(tp, irq_sync); tp->rx_pending = ering->rx_pending; @@ -12597,6 +12608,7 @@ static int tg3_set_ringparam(struct net_device *dev, } tg3_full_unlock(tp); + netdev_unlock(dev); if (irq_sync && !err) tg3_phy_start(tp); @@ -12678,6 +12690,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam irq_sync = 1; } + netdev_lock(dev); tg3_full_lock(tp, irq_sync); if (epause->autoneg) @@ -12707,6 +12720,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam } tg3_full_unlock(tp); + netdev_unlock(dev); } tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; @@ -13911,6 +13925,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, data[TG3_INTERRUPT_TEST] = 1; } + netdev_lock(dev); tg3_full_lock(tp, 0); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -13922,6 +13937,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, } tg3_full_unlock(tp); + netdev_unlock(dev); if (irq_sync && !err2) tg3_phy_start(tp); @@ -14365,6 +14381,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_set_mtu(dev, tp, new_mtu); + netdev_lock(dev); tg3_full_lock(tp, 1); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -14384,6 +14401,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_netif_start(tp); tg3_full_unlock(tp); + netdev_unlock(dev); if (!err) tg3_phy_start(tp); @@ -18164,6 +18182,7 @@ static int tg3_resume(struct device *device) netif_device_attach(dev); + netdev_lock(dev); tg3_full_lock(tp, 0); tg3_ape_driver_state_change(tp, RESET_KIND_INIT); @@ -18180,6 +18199,7 @@ static int tg3_resume(struct device *device) out: tg3_full_unlock(tp); + netdev_unlock(dev); if (!err) tg3_phy_start(tp); @@ -18260,7 +18280,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, done: if (state == pci_channel_io_perm_failure) { if (netdev) { + netdev_lock(netdev); tg3_napi_enable(tp); + netdev_unlock(netdev); dev_close(netdev); } err = PCI_ERS_RESULT_DISCONNECT; @@ -18314,7 +18336,9 @@ static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev) done: if (rc != PCI_ERS_RESULT_RECOVERED && netdev && netif_running(netdev)) { + netdev_lock(netdev); tg3_napi_enable(tp); + netdev_unlock(netdev); dev_close(netdev); } rtnl_unlock(); @@ -18340,12 +18364,14 @@ static void tg3_io_resume(struct pci_dev *pdev) if (!netdev || !netif_running(netdev)) goto done; + netdev_lock(netdev); tg3_full_lock(tp, 0); tg3_ape_driver_state_change(tp, RESET_KIND_INIT); tg3_flag_set(tp, INIT_COMPLETE); err = tg3_restart_hw(tp, true); if (err) { tg3_full_unlock(tp); + netdev_unlock(netdev); netdev_err(netdev, "Cannot restart hardware after reset.\n"); goto done; } @@ -18357,6 +18383,7 @@ static void tg3_io_resume(struct pci_dev *pdev) tg3_netif_start(tp); tg3_full_unlock(tp); + netdev_unlock(netdev); tg3_phy_start(tp); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 8735e333034c..b87eaf0c250c 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1777,10 +1777,11 @@ static void dm9000_drv_remove(struct platform_device *pdev) unregister_netdev(ndev); dm9000_release_board(pdev, dm); - free_netdev(ndev); /* free device structure */ if (dm->power_supply) regulator_disable(dm->power_supply); + free_netdev(ndev); /* free device structure */ + dev_dbg(&pdev->dev, "released and freed device\n"); } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 68725506a095..f7c4ce8e9a26 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -840,6 +840,8 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, struct fec_enet_private *fep = netdev_priv(ndev); int hdr_len, total_len, data_left; struct bufdesc *bdp = txq->bd.cur; + struct bufdesc *tmp_bdp; + struct bufdesc_ex *ebdp; struct tso_t tso; unsigned int index = 0; int ret; @@ -913,7 +915,34 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, return 0; err_release: - /* TODO: Release all used data descriptors for TSO */ + /* Release all used data descriptors for TSO */ + tmp_bdp = txq->bd.cur; + + while (tmp_bdp != bdp) { + /* Unmap data buffers */ + if (tmp_bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(tmp_bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(tmp_bdp->cbd_bufaddr), + fec16_to_cpu(tmp_bdp->cbd_datlen), + DMA_TO_DEVICE); + + /* Clear standard buffer descriptor fields */ + tmp_bdp->cbd_sc = 0; + tmp_bdp->cbd_datlen = 0; + tmp_bdp->cbd_bufaddr = 0; + + /* Handle extended descriptor if enabled */ + if (fep->bufdesc_ex) { + ebdp = (struct bufdesc_ex *)tmp_bdp; + ebdp->cbd_esc = 0; + } + + tmp_bdp = fec_enet_get_nextdesc(tmp_bdp, &txq->bd); + } + + dev_kfree_skb_any(skb); + return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 9a63fbc69408..b25fb400f476 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -40,6 +40,21 @@ EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); */ static DEFINE_MUTEX(hnae3_common_lock); +/* ensure the drivers being unloaded one by one */ +static DEFINE_MUTEX(hnae3_unload_lock); + +void hnae3_acquire_unload_lock(void) +{ + mutex_lock(&hnae3_unload_lock); +} +EXPORT_SYMBOL(hnae3_acquire_unload_lock); + +void hnae3_release_unload_lock(void) +{ + mutex_unlock(&hnae3_unload_lock); +} +EXPORT_SYMBOL(hnae3_release_unload_lock); + static bool hnae3_client_match(enum hnae3_client_type client_type) { if (client_type == HNAE3_CLIENT_KNIC || diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 12ba380eb701..4e44f28288f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -963,4 +963,6 @@ int hnae3_register_client(struct hnae3_client *client); void hnae3_set_client_init_flag(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev, unsigned int inited); +void hnae3_acquire_unload_lock(void); +void hnae3_release_unload_lock(void); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a7e3b22f641c..9ff797fb36c4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -6002,9 +6002,11 @@ module_init(hns3_init_module); */ static void __exit hns3_exit_module(void) { + hnae3_acquire_unload_lock(); pci_unregister_driver(&hns3_driver); hnae3_unregister_client(&client); hns3_dbg_unregister_debugfs(); + hnae3_release_unload_lock(); } module_exit(hns3_exit_module); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index db7845009252..3f17b3073e50 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -12919,9 +12919,11 @@ static int __init hclge_init(void) static void __exit hclge_exit(void) { + hnae3_acquire_unload_lock(); hnae3_unregister_ae_algo_prepare(&ae_algo); hnae3_unregister_ae_algo(&ae_algo); destroy_workqueue(hclge_wq); + hnae3_release_unload_lock(); } module_init(hclge_init); module_exit(hclge_exit); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 163c6e59ea4c..9ba767740a04 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3410,8 +3410,10 @@ static int __init hclgevf_init(void) static void __exit hclgevf_exit(void) { + hnae3_acquire_unload_lock(); hnae3_unregister_ae_algo(&ae_algovf); destroy_workqueue(hclgevf_wq); + hnae3_release_unload_lock(); } module_init(hclgevf_init); module_exit(hclgevf_exit); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index cbfaaa5b7d02..2d7a18fcc3be 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -773,6 +773,11 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, f->state = IAVF_VLAN_ADD; adapter->num_vlan_filters++; iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER); + } else if (f->state == IAVF_VLAN_REMOVE) { + /* IAVF_VLAN_REMOVE means that VLAN wasn't yet removed. + * We can safely only change the state here. + */ + f->state = IAVF_VLAN_ACTIVE; } clearout: @@ -793,8 +798,18 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan) f = iavf_find_vlan(adapter, vlan); if (f) { - f->state = IAVF_VLAN_REMOVE; - iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER); + /* IAVF_ADD_VLAN means that VLAN wasn't even added yet. + * Remove it from the list. + */ + if (f->state == IAVF_VLAN_ADD) { + list_del(&f->list); + kfree(f); + adapter->num_vlan_filters--; + } else { + f->state = IAVF_VLAN_REMOVE; + iavf_schedule_aq_request(adapter, + IAVF_FLAG_AQ_DEL_VLAN_FILTER); + } } spin_unlock_bh(&adapter->mac_vlan_list_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 01536a382e54..bdee499f991a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1498,7 +1498,6 @@ struct ice_aqc_dnl_equa_param { #define ICE_AQC_RX_EQU_POST1 (0x12 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_BFLF (0x13 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_BFHF (0x14 << ICE_AQC_RX_EQU_SHIFT) -#define ICE_AQC_RX_EQU_DRATE (0x15 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_CTLE_GAINHF (0x20 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_CTLE_GAINLF (0x21 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_CTLE_GAINDC (0x22 << ICE_AQC_RX_EQU_SHIFT) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3072634bf049..f241493a6ac8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -710,7 +710,6 @@ static int ice_get_tx_rx_equa(struct ice_hw *hw, u8 serdes_num, { ICE_AQC_RX_EQU_POST1, rx, &ptr->rx_equ_post1 }, { ICE_AQC_RX_EQU_BFLF, rx, &ptr->rx_equ_bflf }, { ICE_AQC_RX_EQU_BFHF, rx, &ptr->rx_equ_bfhf }, - { ICE_AQC_RX_EQU_DRATE, rx, &ptr->rx_equ_drate }, { ICE_AQC_RX_EQU_CTLE_GAINHF, rx, &ptr->rx_equ_ctle_gainhf }, { ICE_AQC_RX_EQU_CTLE_GAINLF, rx, &ptr->rx_equ_ctle_gainlf }, { ICE_AQC_RX_EQU_CTLE_GAINDC, rx, &ptr->rx_equ_ctle_gaindc }, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h index 8f2ad1c172c0..23b2cfbc9684 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.h +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h @@ -15,7 +15,6 @@ struct ice_serdes_equalization_to_ethtool { int rx_equ_post1; int rx_equ_bflf; int rx_equ_bfhf; - int rx_equ_drate; int rx_equ_ctle_gainhf; int rx_equ_ctle_gainlf; int rx_equ_ctle_gaindc; diff --git a/drivers/net/ethernet/intel/ice/ice_parser.h b/drivers/net/ethernet/intel/ice/ice_parser.h index 6509d807627c..4f56d53d56b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_parser.h +++ b/drivers/net/ethernet/intel/ice/ice_parser.h @@ -257,7 +257,6 @@ ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *table, int size, /*** ICE_SID_RXPARSER_BOOST_TCAM and ICE_SID_LBL_RXPARSER_TMEM sections ***/ #define ICE_BST_TCAM_TABLE_SIZE 256 #define ICE_BST_TCAM_KEY_SIZE 20 -#define ICE_BST_KEY_TCAM_SIZE 19 /* Boost TCAM item */ struct ice_bst_tcam_item { @@ -401,7 +400,6 @@ u16 ice_xlt_kb_flag_get(struct ice_xlt_kb *kb, u64 pkt_flag); #define ICE_PARSER_GPR_NUM 128 #define ICE_PARSER_FLG_NUM 64 #define ICE_PARSER_ERR_NUM 16 -#define ICE_BST_KEY_SIZE 10 #define ICE_MARKER_ID_SIZE 9 #define ICE_MARKER_MAX_SIZE \ (ICE_MARKER_ID_SIZE * BITS_PER_BYTE - 1) @@ -431,13 +429,13 @@ struct ice_parser_rt { u8 pkt_buf[ICE_PARSER_MAX_PKT_LEN + ICE_PARSER_PKT_REV]; u16 pkt_len; u16 po; - u8 bst_key[ICE_BST_KEY_SIZE]; + u8 bst_key[ICE_BST_TCAM_KEY_SIZE]; struct ice_pg_cam_key pg_key; + u8 pg_prio; struct ice_alu *alu0; struct ice_alu *alu1; struct ice_alu *alu2; struct ice_pg_cam_action *action; - u8 pg_prio; struct ice_gpr_pu pu; u8 markers[ICE_MARKER_ID_SIZE]; bool protocols[ICE_PO_PAIR_SIZE]; diff --git a/drivers/net/ethernet/intel/ice/ice_parser_rt.c b/drivers/net/ethernet/intel/ice/ice_parser_rt.c index dedf5e854e4b..3995d662e050 100644 --- a/drivers/net/ethernet/intel/ice/ice_parser_rt.c +++ b/drivers/net/ethernet/intel/ice/ice_parser_rt.c @@ -125,22 +125,20 @@ static void ice_bst_key_init(struct ice_parser_rt *rt, else key[idd] = imem->b_kb.prio; - idd = ICE_BST_KEY_TCAM_SIZE - 1; + idd = ICE_BST_TCAM_KEY_SIZE - 2; for (i = idd; i >= 0; i--) { int j; j = ho + idd - i; if (j < ICE_PARSER_MAX_PKT_LEN) - key[i] = rt->pkt_buf[ho + idd - i]; + key[i] = rt->pkt_buf[j]; else key[i] = 0; } - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generated Boost TCAM Key:\n"); - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n", - key[0], key[1], key[2], key[3], key[4], - key[5], key[6], key[7], key[8], key[9]); - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "\n"); + ice_debug_array_w_prefix(rt->psr->hw, ICE_DBG_PARSER, + KBUILD_MODNAME ": Generated Boost TCAM Key", + key, ICE_BST_TCAM_KEY_SIZE); } static u16 ice_bit_rev_u16(u16 v, int len) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index 4849590a5591..b28991dd1870 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -376,6 +376,9 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD)) break; + /* Ensure no other fields are read until DD flag is checked */ + dma_rmb(); + /* strip off FW internal code */ desc_err = le16_to_cpu(desc->ret_val) & 0xff; @@ -563,6 +566,9 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, if (!(flags & IDPF_CTLQ_FLAG_DD)) break; + /* Ensure no other fields are read until DD flag is checked */ + dma_rmb(); + q_msg[i].vmvf_type = (flags & (IDPF_CTLQ_FLAG_FTYPE_VM | IDPF_CTLQ_FLAG_FTYPE_PF)) >> diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index f71d3182580b..b6c515d14cbf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -174,7 +174,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); pci_set_drvdata(pdev, adapter); - adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0, + adapter->init_wq = alloc_workqueue("%s-%s-init", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->init_wq) { @@ -183,7 +184,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free; } - adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0, + adapter->serv_wq = alloc_workqueue("%s-%s-service", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->serv_wq) { @@ -192,7 +194,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_serv_wq_alloc; } - adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0, + adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->mbx_wq) { @@ -201,7 +204,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_mbx_wq_alloc; } - adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0, + adapter->stats_wq = alloc_workqueue("%s-%s-stats", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->stats_wq) { @@ -210,7 +214,8 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_stats_wq_alloc; } - adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0, + adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0, dev_driver_string(dev), dev_name(dev)); if (!adapter->vc_event_wq) { diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d46c95f91b0d..3d2413b8684f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -517,8 +517,10 @@ static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter, retval = -ENXIO; goto only_unlock; case IDPF_VC_XN_WAITING: - dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n", - params->vc_op, params->timeout_ms); + dev_notice_ratelimited(&adapter->pdev->dev, + "Transaction timed-out (op:%d cookie:%04x vc_op:%d salt:%02x timeout:%dms)\n", + params->vc_op, cookie, xn->vc_op, + xn->salt, params->timeout_ms); retval = -ETIME; break; case IDPF_VC_XN_COMPLETED_SUCCESS: @@ -612,14 +614,16 @@ idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, return -EINVAL; } xn = &adapter->vcxn_mngr->ring[xn_idx]; + idpf_vc_xn_lock(xn); salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info); if (xn->salt != salt) { - dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n", - xn->salt, salt); + dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (exp:%d@%02x(%d) != got:%d@%02x)\n", + xn->vc_op, xn->salt, xn->state, + ctlq_msg->cookie.mbx.chnl_opcode, salt); + idpf_vc_xn_unlock(xn); return -EINVAL; } - idpf_vc_xn_lock(xn); switch (xn->state) { case IDPF_VC_XN_WAITING: /* success */ @@ -3077,12 +3081,21 @@ init_failed: */ void idpf_vc_core_deinit(struct idpf_adapter *adapter) { + bool remove_in_prog; + if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) return; + /* Avoid transaction timeouts when called during reset */ + remove_in_prog = test_bit(IDPF_REMOVE_IN_PROG, adapter->flags); + if (!remove_in_prog) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); + idpf_deinit_task(adapter); idpf_intr_rel(adapter); - idpf_vc_xn_shutdown(adapter->vcxn_mngr); + + if (remove_in_prog) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->mbx_task); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 82f4333fb426..4fe121b9f94b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4432,6 +4432,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) */ if (pp->is_stopped) { spin_unlock(&pp->lock); + netdev_unlock(port->napi.dev); return 0; } netif_tx_stop_all_queues(pp->dev); diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c index 415d784de741..09f448f29124 100644 --- a/drivers/net/ethernet/mediatek/airoha_eth.c +++ b/drivers/net/ethernet/mediatek/airoha_eth.c @@ -266,11 +266,11 @@ #define REG_GDM3_FWD_CFG GDM3_BASE #define GDM3_PAD_EN_MASK BIT(28) -#define REG_GDM4_FWD_CFG (GDM4_BASE + 0x100) +#define REG_GDM4_FWD_CFG GDM4_BASE #define GDM4_PAD_EN_MASK BIT(28) #define GDM4_SPORT_OFFSET0_MASK GENMASK(11, 8) -#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x33c) +#define REG_GDM4_SRC_PORT_SET (GDM4_BASE + 0x23c) #define GDM4_SPORT_OFF2_MASK GENMASK(19, 16) #define GDM4_SPORT_OFF1_MASK GENMASK(15, 12) #define GDM4_SPORT_OFF0_MASK GENMASK(11, 8) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bd41b75d246e..a814b63ed97e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2087,7 +2087,7 @@ static struct mlx5e_xdpsq *mlx5e_open_xdpredirect_sq(struct mlx5e_channel *c, struct mlx5e_xdpsq *xdpsq; int err; - xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, c->cpu); + xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, cpu_to_node(c->cpu)); if (!xdpsq) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 720f577929db..499e5e39d513 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1120,20 +1120,6 @@ static void nv_disable_hw_interrupts(struct net_device *dev, u32 mask) } } -static void nv_napi_enable(struct net_device *dev) -{ - struct fe_priv *np = get_nvpriv(dev); - - napi_enable(&np->napi); -} - -static void nv_napi_disable(struct net_device *dev) -{ - struct fe_priv *np = get_nvpriv(dev); - - napi_disable(&np->napi); -} - #define MII_READ (-1) /* mii_rw: read/write a register on the PHY. * @@ -3114,7 +3100,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) * Changing the MTU is a rare event, it shouldn't matter. */ nv_disable_irq(dev); - nv_napi_disable(dev); + napi_disable(&np->napi); netif_tx_lock_bh(dev); netif_addr_lock(dev); spin_lock(&np->lock); @@ -3143,7 +3129,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) spin_unlock(&np->lock); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); - nv_napi_enable(dev); + napi_enable(&np->napi); nv_enable_irq(dev); } return 0; @@ -4731,7 +4717,7 @@ static int nv_set_ringparam(struct net_device *dev, if (netif_running(dev)) { nv_disable_irq(dev); - nv_napi_disable(dev); + napi_disable(&np->napi); netif_tx_lock_bh(dev); netif_addr_lock(dev); spin_lock(&np->lock); @@ -4784,7 +4770,7 @@ static int nv_set_ringparam(struct net_device *dev, spin_unlock(&np->lock); netif_addr_unlock(dev); netif_tx_unlock_bh(dev); - nv_napi_enable(dev); + napi_enable(&np->napi); nv_enable_irq(dev); } return 0; @@ -5277,7 +5263,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64 if (test->flags & ETH_TEST_FL_OFFLINE) { if (netif_running(dev)) { netif_stop_queue(dev); - nv_napi_disable(dev); + napi_disable(&np->napi); netif_tx_lock_bh(dev); netif_addr_lock(dev); spin_lock_irq(&np->lock); @@ -5334,7 +5320,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64 /* restart rx engine */ nv_start_rxtx(dev); netif_start_queue(dev); - nv_napi_enable(dev); + napi_enable(&np->napi); nv_enable_hw_interrupts(dev, np->irqmask); } } @@ -5576,6 +5562,7 @@ static int nv_open(struct net_device *dev) /* ask for interrupts */ nv_enable_hw_interrupts(dev, np->irqmask); + netdev_lock(dev); spin_lock_irq(&np->lock); writel(NVREG_MCASTADDRA_FORCE, base + NvRegMulticastAddrA); writel(0, base + NvRegMulticastAddrB); @@ -5594,7 +5581,7 @@ static int nv_open(struct net_device *dev) ret = nv_update_linkspeed(dev); nv_start_rxtx(dev); netif_start_queue(dev); - nv_napi_enable(dev); + napi_enable_locked(&np->napi); if (ret) { netif_carrier_on(dev); @@ -5611,6 +5598,7 @@ static int nv_open(struct net_device *dev) round_jiffies(jiffies + STATS_INTERVAL)); spin_unlock_irq(&np->lock); + netdev_unlock(dev); /* If the loopback feature was set while the device was down, make sure * that it's set correctly now. @@ -5632,7 +5620,7 @@ static int nv_close(struct net_device *dev) spin_lock_irq(&np->lock); np->in_shutdown = 1; spin_unlock_irq(&np->lock); - nv_napi_disable(dev); + napi_disable(&np->napi); synchronize_irq(np->pci_dev->irq); del_timer_sync(&np->oom_kick); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 9ce0e8a64ba8..a73dcaffa8c5 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1684,6 +1684,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) if (tmp8 & CmdTxEnb) RTL_W8 (ChipCmd, CmdRxEnb); + netdev_lock(dev); spin_lock_bh(&tp->rx_lock); /* Disable interrupts by clearing the interrupt mask. */ RTL_W16 (IntrMask, 0x0000); @@ -1694,11 +1695,12 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) spin_unlock_irq(&tp->lock); /* ...and finally, reset everything */ - napi_enable(&tp->napi); + napi_enable_locked(&tp->napi); rtl8139_hw_start(dev); netif_wake_queue(dev); spin_unlock_bh(&tp->rx_lock); + netdev_unlock(dev); } static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index bc395294a32d..c9f4976a3527 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3217,10 +3217,15 @@ static int ravb_suspend(struct device *dev) netif_device_detach(ndev); - if (priv->wol_enabled) - return ravb_wol_setup(ndev); + rtnl_lock(); + if (priv->wol_enabled) { + ret = ravb_wol_setup(ndev); + rtnl_unlock(); + return ret; + } ret = ravb_close(ndev); + rtnl_unlock(); if (ret) return ret; @@ -3245,19 +3250,20 @@ static int ravb_resume(struct device *dev) if (!netif_running(ndev)) return 0; + rtnl_lock(); /* If WoL is enabled restore the interface. */ - if (priv->wol_enabled) { + if (priv->wol_enabled) ret = ravb_wol_restore(ndev); - if (ret) - return ret; - } else { + else ret = pm_runtime_force_resume(dev); - if (ret) - return ret; + if (ret) { + rtnl_unlock(); + return ret; } /* Reopening the interface will restore the device to the working state. */ ret = ravb_open(ndev); + rtnl_unlock(); if (ret < 0) goto out_rpm_put; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 8887b8921009..5fc8027c92c7 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3494,10 +3494,12 @@ static int sh_eth_suspend(struct device *dev) netif_device_detach(ndev); + rtnl_lock(); if (mdp->wol_enabled) ret = sh_eth_wol_setup(ndev); else ret = sh_eth_close(ndev); + rtnl_unlock(); return ret; } @@ -3511,10 +3513,12 @@ static int sh_eth_resume(struct device *dev) if (!netif_running(ndev)) return 0; + rtnl_lock(); if (mdp->wol_enabled) ret = sh_eth_wol_restore(ndev); else ret = sh_eth_open(ndev); + rtnl_unlock(); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5212dc439b1d..d04543e5697b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2424,11 +2424,6 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; - if (rxfifosz == 0) - rxfifosz = priv->dma_cap.rx_fifo_size; - if (txfifosz == 0) - txfifosz = priv->dma_cap.tx_fifo_size; - /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2897,11 +2892,6 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; - if (rxfifosz == 0) - rxfifosz = priv->dma_cap.rx_fifo_size; - if (txfifosz == 0) - txfifosz = priv->dma_cap.tx_fifo_size; - /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5878,9 +5868,6 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; - if (txfifosz == 0) - txfifosz = priv->dma_cap.tx_fifo_size; - txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -7217,6 +7204,50 @@ static int stmmac_hw_init(struct stmmac_priv *priv) if (priv->dma_cap.tsoen) dev_info(priv->device, "TSO supported\n"); + if (priv->dma_cap.number_rx_queues && + priv->plat->rx_queues_to_use > priv->dma_cap.number_rx_queues) { + dev_warn(priv->device, + "Number of Rx queues (%u) exceeds dma capability\n", + priv->plat->rx_queues_to_use); + priv->plat->rx_queues_to_use = priv->dma_cap.number_rx_queues; + } + if (priv->dma_cap.number_tx_queues && + priv->plat->tx_queues_to_use > priv->dma_cap.number_tx_queues) { + dev_warn(priv->device, + "Number of Tx queues (%u) exceeds dma capability\n", + priv->plat->tx_queues_to_use); + priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; + } + + if (!priv->plat->rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size) { + priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; + } else { + dev_err(priv->device, "Can't specify Rx FIFO size\n"); + return -ENODEV; + } + } else if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + dev_warn(priv->device, + "Rx FIFO size (%u) exceeds dma capability\n", + priv->plat->rx_fifo_size); + priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; + } + if (!priv->plat->tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size) { + priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; + } else { + dev_err(priv->device, "Can't specify Tx FIFO size\n"); + return -ENODEV; + } + } else if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + dev_warn(priv->device, + "Tx FIFO size (%u) exceeds dma capability\n", + priv->plat->tx_fifo_size); + priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; + } + priv->hw->vlan_fail_q_en = (priv->plat->flags & STMMAC_FLAG_VLAN_FAIL_Q_EN); priv->hw->vlan_fail_q = priv->plat->vlan_fail_q; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index d7459866d24c..72177fea1cfb 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6086,7 +6086,7 @@ static void niu_enable_napi(struct niu *np) int i; for (i = 0; i < np->num_ldg; i++) - napi_enable(&np->ldg[i].napi); + napi_enable_locked(&np->ldg[i].napi); } static void niu_disable_napi(struct niu *np) @@ -6116,7 +6116,9 @@ static int niu_open(struct net_device *dev) if (err) goto out_free_channels; + netdev_lock(dev); niu_enable_napi(np); + netdev_unlock(dev); spin_lock_irq(&np->lock); @@ -6521,6 +6523,7 @@ static void niu_reset_task(struct work_struct *work) niu_reset_buffers(np); + netdev_lock(np->dev); spin_lock_irqsave(&np->lock, flags); err = niu_init_hw(np); @@ -6531,6 +6534,7 @@ static void niu_reset_task(struct work_struct *work) } spin_unlock_irqrestore(&np->lock, flags); + netdev_unlock(np->dev); } static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue) @@ -6761,7 +6765,9 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu) niu_free_channels(np); + netdev_lock(dev); niu_enable_napi(np); + netdev_unlock(dev); err = niu_alloc_channels(np); if (err) @@ -9937,6 +9943,7 @@ static int __maybe_unused niu_resume(struct device *dev_d) spin_lock_irqsave(&np->lock, flags); + netdev_lock(dev); err = niu_init_hw(np); if (!err) { np->timer.expires = jiffies + HZ; @@ -9945,6 +9952,7 @@ static int __maybe_unused niu_resume(struct device *dev_d) } spin_unlock_irqrestore(&np->lock, flags); + netdev_unlock(dev); return err; } diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 894911f3d560..e56ebbdd428d 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -1568,7 +1568,7 @@ static void init_registers(struct net_device *dev) if (rp->quirks & rqMgmt) rhine_init_cam_filter(dev); - napi_enable(&rp->napi); + napi_enable_locked(&rp->napi); iowrite16(RHINE_EVENT & 0xffff, ioaddr + IntrEnable); @@ -1696,7 +1696,10 @@ static int rhine_open(struct net_device *dev) rhine_power_init(dev); rhine_chip_reset(dev); rhine_task_enable(rp); + + netdev_lock(dev); init_registers(dev); + netdev_unlock(dev); netif_dbg(rp, ifup, dev, "%s() Done - status %04x MII status: %04x\n", __func__, ioread16(ioaddr + ChipCmd), @@ -1727,6 +1730,8 @@ static void rhine_reset_task(struct work_struct *work) napi_disable(&rp->napi); netif_tx_disable(dev); + + netdev_lock(dev); spin_lock_bh(&rp->lock); /* clear all descriptors */ @@ -1740,6 +1745,7 @@ static void rhine_reset_task(struct work_struct *work) init_registers(dev); spin_unlock_bh(&rp->lock); + netdev_unlock(dev); netif_trans_update(dev); /* prevent tx timeout */ dev->stats.tx_errors++; @@ -2541,9 +2547,12 @@ static int rhine_resume(struct device *device) alloc_tbufs(dev); rhine_reset_rbufs(rp); rhine_task_enable(rp); + + netdev_lock(dev); spin_lock_bh(&rp->lock); init_registers(dev); spin_unlock_bh(&rp->lock); + netdev_unlock(dev); netif_device_attach(dev); diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 3b23f3d3ca2b..5c80fbee7913 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -74,7 +74,7 @@ static void nsim_get_ringparam(struct net_device *dev, memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; - if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index dcf073bc4802..96d54c08043d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -134,6 +134,7 @@ struct netdevsim { u32 sleep; u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS]; + struct dentry *ddir; struct debugfs_u32_array dfs_ports[2]; } udp_ports; diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c index 02dc3123eb6c..640b4983a9a0 100644 --- a/drivers/net/netdevsim/udp_tunnels.c +++ b/drivers/net/netdevsim/udp_tunnels.c @@ -112,9 +112,11 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); - memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); rtnl_lock(); - udp_tunnel_nic_reset_ntf(dev); + if (dev->reg_state == NETREG_REGISTERED) { + memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); + udp_tunnel_nic_reset_ntf(dev); + } rtnl_unlock(); return count; @@ -144,23 +146,23 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, else ns->udp_ports.ports = nsim_dev->udp_ports.__ports; - debugfs_create_u32("udp_ports_inject_error", 0600, - ns->nsim_dev_port->ddir, + ns->udp_ports.ddir = debugfs_create_dir("udp_ports", + ns->nsim_dev_port->ddir); + + debugfs_create_u32("inject_error", 0600, ns->udp_ports.ddir, &ns->udp_ports.inject_error); ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; - debugfs_create_u32_array("udp_ports_table0", 0400, - ns->nsim_dev_port->ddir, + debugfs_create_u32_array("table0", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[0]); ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; - debugfs_create_u32_array("udp_ports_table1", 0400, - ns->nsim_dev_port->ddir, + debugfs_create_u32_array("table1", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[1]); - debugfs_create_file("udp_ports_reset", 0200, ns->nsim_dev_port->ddir, + debugfs_create_file("reset", 0200, ns->udp_ports.ddir, dev, &nsim_udp_tunnels_info_reset_fops); /* Note: it's not normal to allocate the info struct like this! @@ -196,6 +198,9 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, void nsim_udp_tunnels_info_destroy(struct net_device *dev) { + struct netdevsim *ns = netdev_priv(dev); + + debugfs_remove_recursive(ns->udp_ports.ddir); kfree(dev->udp_tunnel_nic_info); dev->udp_tunnel_nic_info = NULL; } diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index 4494b3e39ce2..a3996471a1c9 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -95,6 +95,10 @@ #define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246 +struct mv88q2xxx_priv { + bool enable_temp; +}; + struct mmd_val { int devad; u32 regnum; @@ -710,17 +714,12 @@ static const struct hwmon_chip_info mv88q2xxx_hwmon_chip_info = { static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; struct device *dev = &phydev->mdio.dev; struct device *hwmon; char *hwmon_name; - int ret; - - /* Enable temperature sense */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TEMP_SENSOR2, - MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); - if (ret < 0) - return ret; + priv->enable_temp = true; hwmon_name = devm_hwmon_sanitize_name(dev, dev_name(dev)); if (IS_ERR(hwmon_name)) return PTR_ERR(hwmon_name); @@ -743,6 +742,14 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) static int mv88q2xxx_probe(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + return mv88q2xxx_hwmon_probe(phydev); } @@ -810,6 +817,18 @@ static int mv88q222x_revb1_revb2_config_init(struct phy_device *phydev) static int mv88q222x_config_init(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; + int ret; + + /* Enable temperature sense */ + if (priv->enable_temp) { + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_TEMP_SENSOR2, + MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); + if (ret < 0) + return ret; + } + if (phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] == PHY_ID_88Q2220_REVB0) return mv88q222x_revb0_config_init(phydev); else diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 323717a4821f..34231b5b9175 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1297,6 +1297,8 @@ static int nxp_c45_soft_reset(struct phy_device *phydev) if (ret) return ret; + usleep_range(2000, 2050); + return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_CONTROL, ret, !(ret & DEVICE_CONTROL_RESET), 20000, diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 46afb95ffabe..a19789b57190 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -61,7 +61,18 @@ #define IPHETH_USBINTF_PROTO 1 #define IPHETH_IP_ALIGN 2 /* padding at front of URB */ -#define IPHETH_NCM_HEADER_SIZE (12 + 96) /* NCMH + NCM0 */ +/* On iOS devices, NCM headers in RX have a fixed size regardless of DPE count: + * - NTH16 (NCMH): 12 bytes, as per CDC NCM 1.0 spec + * - NDP16 (NCM0): 96 bytes, of which + * - NDP16 fixed header: 8 bytes + * - maximum of 22 DPEs (21 datagrams + trailer), 4 bytes each + */ +#define IPHETH_NDP16_MAX_DPE 22 +#define IPHETH_NDP16_HEADER_SIZE (sizeof(struct usb_cdc_ncm_ndp16) + \ + IPHETH_NDP16_MAX_DPE * \ + sizeof(struct usb_cdc_ncm_dpe16)) +#define IPHETH_NCM_HEADER_SIZE (sizeof(struct usb_cdc_ncm_nth16) + \ + IPHETH_NDP16_HEADER_SIZE) #define IPHETH_TX_BUF_SIZE ETH_FRAME_LEN #define IPHETH_RX_BUF_SIZE_LEGACY (IPHETH_IP_ALIGN + ETH_FRAME_LEN) #define IPHETH_RX_BUF_SIZE_NCM 65536 @@ -207,15 +218,23 @@ static int ipheth_rcvbulk_callback_legacy(struct urb *urb) return ipheth_consume_skb(buf, len, dev); } +/* In "NCM mode", the iOS device encapsulates RX (phone->computer) traffic + * in NCM Transfer Blocks (similarly to CDC NCM). However, unlike reverse + * tethering (handled by the `cdc_ncm` driver), regular tethering is not + * compliant with the CDC NCM spec, as the device is missing the necessary + * descriptors, and TX (computer->phone) traffic is not encapsulated + * at all. Thus `ipheth` implements a very limited subset of the spec with + * the sole purpose of parsing RX URBs. + */ static int ipheth_rcvbulk_callback_ncm(struct urb *urb) { struct usb_cdc_ncm_nth16 *ncmh; struct usb_cdc_ncm_ndp16 *ncm0; struct usb_cdc_ncm_dpe16 *dpe; struct ipheth_device *dev; + u16 dg_idx, dg_len; int retval = -EINVAL; char *buf; - int len; dev = urb->context; @@ -226,40 +245,42 @@ static int ipheth_rcvbulk_callback_ncm(struct urb *urb) ncmh = urb->transfer_buffer; if (ncmh->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN) || - le16_to_cpu(ncmh->wNdpIndex) >= urb->actual_length) { - dev->net->stats.rx_errors++; - return retval; - } + /* On iOS, NDP16 directly follows NTH16 */ + ncmh->wNdpIndex != cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16))) + goto rx_error; - ncm0 = urb->transfer_buffer + le16_to_cpu(ncmh->wNdpIndex); - if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN) || - le16_to_cpu(ncmh->wHeaderLength) + le16_to_cpu(ncm0->wLength) >= - urb->actual_length) { - dev->net->stats.rx_errors++; - return retval; - } + ncm0 = urb->transfer_buffer + sizeof(struct usb_cdc_ncm_nth16); + if (ncm0->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)) + goto rx_error; dpe = ncm0->dpe16; - while (le16_to_cpu(dpe->wDatagramIndex) != 0 && - le16_to_cpu(dpe->wDatagramLength) != 0) { - if (le16_to_cpu(dpe->wDatagramIndex) >= urb->actual_length || - le16_to_cpu(dpe->wDatagramIndex) + - le16_to_cpu(dpe->wDatagramLength) > urb->actual_length) { + for (int dpe_i = 0; dpe_i < IPHETH_NDP16_MAX_DPE; ++dpe_i, ++dpe) { + dg_idx = le16_to_cpu(dpe->wDatagramIndex); + dg_len = le16_to_cpu(dpe->wDatagramLength); + + /* Null DPE must be present after last datagram pointer entry + * (3.3.1 USB CDC NCM spec v1.0) + */ + if (dg_idx == 0 && dg_len == 0) + return 0; + + if (dg_idx < IPHETH_NCM_HEADER_SIZE || + dg_idx >= urb->actual_length || + dg_len > urb->actual_length - dg_idx) { dev->net->stats.rx_length_errors++; return retval; } - buf = urb->transfer_buffer + le16_to_cpu(dpe->wDatagramIndex); - len = le16_to_cpu(dpe->wDatagramLength); + buf = urb->transfer_buffer + dg_idx; - retval = ipheth_consume_skb(buf, len, dev); + retval = ipheth_consume_skb(buf, dg_len, dev); if (retval != 0) return retval; - - dpe++; } - return 0; +rx_error: + dev->net->stats.rx_errors++; + return retval; } static void ipheth_rcvbulk_callback(struct urb *urb) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 01a3b2417a54..ddff6f19ff98 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -71,6 +71,14 @@ #define MSR_SPEED (1<<3) #define MSR_LINK (1<<2) +/* USB endpoints */ +enum rtl8150_usb_ep { + RTL8150_USB_EP_CONTROL = 0, + RTL8150_USB_EP_BULK_IN = 1, + RTL8150_USB_EP_BULK_OUT = 2, + RTL8150_USB_EP_INT_IN = 3, +}; + /* Interrupt pipe data */ #define INT_TSR 0x00 #define INT_RSR 0x01 @@ -867,6 +875,13 @@ static int rtl8150_probe(struct usb_interface *intf, struct usb_device *udev = interface_to_usbdev(intf); rtl8150_t *dev; struct net_device *netdev; + static const u8 bulk_ep_addr[] = { + RTL8150_USB_EP_BULK_IN | USB_DIR_IN, + RTL8150_USB_EP_BULK_OUT | USB_DIR_OUT, + 0}; + static const u8 int_ep_addr[] = { + RTL8150_USB_EP_INT_IN | USB_DIR_IN, + 0}; netdev = alloc_etherdev(sizeof(rtl8150_t)); if (!netdev) @@ -880,6 +895,13 @@ static int rtl8150_probe(struct usb_interface *intf, return -ENOMEM; } + /* Verify that all required endpoints are present */ + if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || + !usb_check_int_endpoints(intf, int_ep_addr)) { + dev_err(&intf->dev, "couldn't find required endpoints\n"); + goto out; + } + tasklet_setup(&dev->tl, rx_fixup); spin_lock_init(&dev->rx_pool_lock); diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index d2023e7131bd..6e6e9f05509a 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -411,6 +411,11 @@ static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb struct tunnel_msg *tmsg; struct net_device *dev; + if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) { + NL_SET_ERR_MSG(cb->extack, "Invalid msg length"); + return -EINVAL; + } + tmsg = nlmsg_data(cb->nlh); if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index a259f4dd9540..413973d05b43 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -1479,14 +1479,13 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev) tasklet_enable(&dev->mt76.pre_tbtt_tasklet); mt7603_beacon_set_timer(dev, -1, beacon_int); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); - napi_schedule(&dev->mt76.tx_napi); - napi_enable(&dev->mt76.napi[0]); - napi_schedule(&dev->mt76.napi[0]); - napi_enable(&dev->mt76.napi[1]); + + local_bh_disable(); + napi_schedule(&dev->mt76.tx_napi); + napi_schedule(&dev->mt76.napi[0]); napi_schedule(&dev->mt76.napi[1]); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c index 9a278589df4e..68010e27f065 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c @@ -164,12 +164,16 @@ static int mt7615_pci_resume(struct pci_dev *pdev) dev_err(mdev->dev, "PDMA engine must be reinitialized\n"); mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c index a0ca3bbdfcaf..c2e4e6aabd9f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c @@ -262,12 +262,14 @@ void mt7615_mac_reset_work(struct work_struct *work) mt76_worker_enable(&dev->mt76.tx_worker); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); - napi_schedule(&dev->mt76.tx_napi); - mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + napi_schedule(&dev->mt76.tx_napi); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 1eb955f3ca13..b456ccd00d58 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -282,14 +282,16 @@ static int mt76x0e_resume(struct pci_dev *pdev) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { mt76_queue_rx_reset(dev, i); napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } - napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 7d840ad4ae65..a82c75ba26e6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -504,12 +504,14 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) mt76_worker_enable(&dev->mt76.tx_worker); tasklet_enable(&dev->mt76.pre_tbtt_tasklet); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); - napi_schedule(&dev->mt76.tx_napi); - mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + napi_schedule(&dev->mt76.tx_napi); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 67c9d1caa0bd..727bfdd00b40 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -151,12 +151,15 @@ mt76x2e_resume(struct pci_dev *pdev) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 13bdc0a7174c..2ba6eb3038ce 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1356,10 +1356,15 @@ mt7915_mac_restart(struct mt7915_dev *dev) mt7915_dma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { if (mdev->q_rx[i].ndesc) { napi_enable(&dev->mt76.napi[i]); + } + } + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + if (mdev->q_rx[i].ndesc) { napi_schedule(&dev->mt76.napi[i]); } } @@ -1419,8 +1424,9 @@ out: if (phy2) clear_bit(MT76_RESET, &phy2->mt76->state); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); @@ -1570,9 +1576,12 @@ void mt7915_mac_reset_work(struct work_struct *work) if (phy2) clear_bit(MT76_RESET, &phy2->mt76->state); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); @@ -1581,8 +1590,8 @@ void mt7915_mac_reset_work(struct work_struct *work) mt76_worker_enable(&dev->mt76.tx_worker); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index ba870e1b05fb..a0c9df3c2cc7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -523,12 +523,15 @@ static int mt7921_pci_resume(struct device *device) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c index 2452b1a2d118..881812ba03ff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c @@ -81,9 +81,12 @@ int mt7921e_mac_reset(struct mt792x_dev *dev) mt792x_wpdma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); + } + + local_bh_disable(); + mt76_for_each_q_rx(&dev->mt76, i) { napi_schedule(&dev->mt76.napi[i]); } local_bh_enable(); @@ -115,8 +118,8 @@ int mt7921e_mac_reset(struct mt792x_dev *dev) err = __mt7921_start(&dev->phy); out: - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c index f36893e20c61..c7b5dc1dbb34 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c @@ -556,12 +556,15 @@ static int mt7925_pci_resume(struct device *device) mt76_worker_enable(&mdev->tx_worker); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { napi_enable(&mdev->napi[i]); - napi_schedule(&mdev->napi[i]); } napi_enable(&mdev->tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(mdev, i) { + napi_schedule(&mdev->napi[i]); + } napi_schedule(&mdev->tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c index faedbf766d1a..4578d16bf456 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci_mac.c @@ -101,12 +101,15 @@ int mt7925e_mac_reset(struct mt792x_dev *dev) mt792x_wpdma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { napi_enable(&dev->mt76.napi[i]); - napi_schedule(&dev->mt76.napi[i]); } napi_enable(&dev->mt76.tx_napi); + + local_bh_disable(); + mt76_for_each_q_rx(&dev->mt76, i) { + napi_schedule(&dev->mt76.napi[i]); + } napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index bc8cba4dca47..019c925ae600 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1695,7 +1695,6 @@ mt7996_mac_restart(struct mt7996_dev *dev) mt7996_dma_reset(dev, true); - local_bh_disable(); mt76_for_each_q_rx(mdev, i) { if (mtk_wed_device_active(&dev->mt76.mmio.wed) && mt76_queue_is_wed_rro(&mdev->q_rx[i])) @@ -1703,10 +1702,11 @@ mt7996_mac_restart(struct mt7996_dev *dev) if (mdev->q_rx[i].ndesc) { napi_enable(&dev->mt76.napi[i]); + local_bh_disable(); napi_schedule(&dev->mt76.napi[i]); + local_bh_enable(); } } - local_bh_enable(); clear_bit(MT76_MCU_RESET, &dev->mphy.state); clear_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state); @@ -1764,8 +1764,8 @@ out: if (phy3) clear_bit(MT76_RESET, &phy3->mt76->state); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); @@ -1958,23 +1958,23 @@ void mt7996_mac_reset_work(struct work_struct *work) if (phy3) clear_bit(MT76_RESET, &phy3->mt76->state); - local_bh_disable(); mt76_for_each_q_rx(&dev->mt76, i) { if (mtk_wed_device_active(&dev->mt76.mmio.wed) && mt76_queue_is_wed_rro(&dev->mt76.q_rx[i])) continue; napi_enable(&dev->mt76.napi[i]); + local_bh_disable(); napi_schedule(&dev->mt76.napi[i]); + local_bh_enable(); } - local_bh_enable(); tasklet_schedule(&dev->mt76.irq_tasklet); mt76_worker_enable(&dev->mt76.tx_worker); - local_bh_disable(); napi_enable(&dev->mt76.tx_napi); + local_bh_disable(); napi_schedule(&dev->mt76.tx_napi); local_bh_enable(); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 76b615d4d5b9..40046770f1bf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2132,15 +2132,16 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns, struct nvme_ns_info *info) { struct queue_limits lim; + unsigned int memflags; int ret; lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - blk_mq_freeze_queue(ns->disk->queue); + memflags = blk_mq_freeze_queue(ns->disk->queue); ret = queue_limits_commit_update(ns->disk->queue, &lim); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); /* Hide the block-interface for these devices */ if (!ret) @@ -2155,6 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_id_ns_nvm *nvm = NULL; struct nvme_zone_info zi = {}; struct nvme_id_ns *id; + unsigned int memflags; sector_t capacity; unsigned lbaf; int ret; @@ -2186,7 +2188,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); - blk_mq_freeze_queue(ns->disk->queue); + memflags = blk_mq_freeze_queue(ns->disk->queue); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); @@ -2219,7 +2221,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ret = queue_limits_commit_update(ns->disk->queue, &lim); if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); goto out; } @@ -2235,7 +2237,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); - blk_mq_unfreeze_queue(ns->disk->queue); + blk_mq_unfreeze_queue(ns->disk->queue, memflags); if (blk_queue_is_zoned(ns->queue)) { ret = blk_revalidate_disk_zones(ns->disk); @@ -2291,9 +2293,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (!ret && nvme_ns_head_multipath(ns->head)) { struct queue_limits *ns_lim = &ns->disk->queue->limits; struct queue_limits lim; + unsigned int memflags; lim = queue_limits_start_update(ns->head->disk->queue); - blk_mq_freeze_queue(ns->head->disk->queue); + memflags = blk_mq_freeze_queue(ns->head->disk->queue); /* * queue_limits mixes values that are the hardware limitations * for bio splitting with what is the device configuration. @@ -2325,7 +2328,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); - blk_mq_unfreeze_queue(ns->head->disk->queue); + blk_mq_unfreeze_queue(ns->head->disk->queue, memflags); } return ret; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a85d190942bd..2a7635565083 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -60,7 +60,7 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) if (h->disk) - blk_mq_unfreeze_queue(h->disk->queue); + blk_mq_unfreeze_queue_nomemrestore(h->disk->queue); } void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index d1d97a4bb36d..3431a7df3e0d 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -419,19 +419,12 @@ static void pcim_intx_restore(struct device *dev, void *data) pci_intx(pdev, res->orig_intx); } -static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) +static void save_orig_intx(struct pci_dev *pdev, struct pcim_intx_devres *res) { - struct pcim_intx_devres *res; - - res = devres_find(dev, pcim_intx_restore, NULL, NULL); - if (res) - return res; + u16 pci_command; - res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); - if (res) - devres_add(dev, res); - - return res; + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE); } /** @@ -447,12 +440,23 @@ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev) int pcim_intx(struct pci_dev *pdev, int enable) { struct pcim_intx_devres *res; + struct device *dev = &pdev->dev; - res = get_or_create_intx_devres(&pdev->dev); - if (!res) - return -ENOMEM; + /* + * pcim_intx() must only restore the INTx value that existed before the + * driver was loaded, i.e., before it called pcim_intx() for the + * first time. + */ + res = devres_find(dev, pcim_intx_restore, NULL, NULL); + if (!res) { + res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + save_orig_intx(pdev, res); + devres_add(dev, res); + } - res->orig_intx = !enable; pci_intx(pdev, enable); return 0; diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index ea96a14d72d1..bf6468c56419 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -4,6 +4,7 @@ * * Copyright (C) 2010 OMICRON electronics GmbH */ +#include <linux/compat.h> #include <linux/module.h> #include <linux/posix-clock.h> #include <linux/poll.h> @@ -176,6 +177,9 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, struct timespec64 ts; int enable, err = 0; + if (in_compat_syscall() && cmd != PTP_ENABLE_PPS && cmd != PTP_ENABLE_PPS2) + arg = (unsigned long)compat_ptr(arg); + tsevq = pccontext->private_clkdata; switch (cmd) { diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index b932425ddc6a..35a5994bf64f 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -217,6 +217,11 @@ static int ptp_getcycles64(struct ptp_clock_info *info, struct timespec64 *ts) return info->gettime64(info, ts); } +static int ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on) +{ + return -EOPNOTSUPP; +} + static void ptp_aux_kworker(struct kthread_work *work) { struct ptp_clock *ptp = container_of(work, struct ptp_clock, @@ -294,6 +299,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->info->getcrosscycles = ptp->info->getcrosststamp; } + if (!ptp->info->enable) + ptp->info->enable = ptp_enable; + if (ptp->info->do_aux_work) { kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); ptp->kworker = kthread_run_worker(0, "ptp%d", ptp->index); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index a60bcc791a48..0bbbf778ecfa 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1316,7 +1316,7 @@ config RTC_DRV_SC27XX config RTC_DRV_SPEAR tristate "SPEAR ST RTC" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help If you say Y here you will get support for the RTC found on spear diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 5c39cf252392..a3e52a5a708f 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -308,7 +308,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev) /* remember whether this power up is caused by PMIC RTC or not */ info->rtc_dev->dev.platform_data = &pdata->rtc_wakeup; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; out_rtc: diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 814230d61842..964cd048fcdb 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -326,7 +326,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev) schedule_delayed_work(&info->calib_work, VRTC_CALIB_INTERVAL); #endif /* VRTC_CALIBRATION */ - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; } diff --git a/drivers/rtc/rtc-amlogic-a4.c b/drivers/rtc/rtc-amlogic-a4.c index 2278b4c98a71..09d78c2cc691 100644 --- a/drivers/rtc/rtc-amlogic-a4.c +++ b/drivers/rtc/rtc-amlogic-a4.c @@ -361,7 +361,7 @@ static int aml_rtc_probe(struct platform_device *pdev) "failed to get_enable rtc sys clk\n"); aml_rtc_init(rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); platform_set_drvdata(pdev, rtc); rtc->rtc_dev = devm_rtc_allocate_device(dev); @@ -391,7 +391,7 @@ static int aml_rtc_probe(struct platform_device *pdev) return 0; err_clk: clk_disable_unprepare(rtc->sys_clk); - device_init_wakeup(dev, 0); + device_init_wakeup(dev, false); return ret; } @@ -426,7 +426,7 @@ static void aml_rtc_remove(struct platform_device *pdev) struct aml_rtc_data *rtc = dev_get_drvdata(&pdev->dev); clk_disable_unprepare(rtc->sys_clk); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } static const struct aml_rtc_config a5_rtc_config = { diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 569c1054d6b0..713fa0d077cd 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -527,7 +527,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); if (rtc->irq != -1) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else clear_bit(RTC_FEATURE_ALARM, rtc->rtc_dev->features); diff --git a/drivers/rtc/rtc-as3722.c b/drivers/rtc/rtc-as3722.c index 0f21af27f4cf..9682d6457b7f 100644 --- a/drivers/rtc/rtc-as3722.c +++ b/drivers/rtc/rtc-as3722.c @@ -187,7 +187,7 @@ static int as3722_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); as3722_rtc->rtc = devm_rtc_device_register(&pdev->dev, "as3722-rtc", &as3722_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 9b3898b8de7c..f6b0102a843a 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -528,7 +528,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev) * being wake-capable; if it didn't, do that here. */ if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); if (at91_rtc_config->has_correction) rtc->ops = &sama5d4_rtc_ops; diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 15b21da2788f..38991cca5930 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -353,7 +353,7 @@ static int at91_rtc_probe(struct platform_device *pdev) /* platform setup code should have handled this; sigh */ if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, rtc); diff --git a/drivers/rtc/rtc-cadence.c b/drivers/rtc/rtc-cadence.c index bf2a9a1fdea7..8634eea799ab 100644 --- a/drivers/rtc/rtc-cadence.c +++ b/drivers/rtc/rtc-cadence.c @@ -359,7 +359,7 @@ static void cdns_rtc_remove(struct platform_device *pdev) struct cdns_rtc *crtc = platform_get_drvdata(pdev); cdns_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); clk_disable_unprepare(crtc->pclk); clk_disable_unprepare(crtc->ref_clk); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 78f2ce12c75a..8172869bd3d7 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -151,11 +151,6 @@ static inline int hpet_set_periodic_freq(unsigned long freq) return 0; } -static inline int hpet_rtc_dropped_irq(void) -{ - return 0; -} - static inline int hpet_rtc_timer_init(void) { return 0; @@ -864,7 +859,7 @@ static void acpi_cmos_wake_setup(struct device *dev) dev_info(dev, "RTC can wake from S4\n"); /* RTC always wakes from S1/S2/S3, and often S4/STD */ - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); } static void cmos_check_acpi_rtc_status(struct device *dev, diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index afc8fcba8f88..568a89e79c11 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -295,7 +295,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) } disable_irq(rtc->update_irq); - err = device_init_wakeup(dev, 1); + err = device_init_wakeup(dev, true); if (err) { dev_err(dev, "wakeup initialization failed (%d)\n", err); /* ignore error and continue without wakeup support */ diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c index 60a48c3ba3ca..865c2e82c7a5 100644 --- a/drivers/rtc/rtc-cros-ec.c +++ b/drivers/rtc/rtc-cros-ec.c @@ -337,7 +337,7 @@ static int cros_ec_rtc_probe(struct platform_device *pdev) return ret; } - ret = device_init_wakeup(&pdev->dev, 1); + ret = device_init_wakeup(&pdev->dev, true); if (ret) { dev_err(&pdev->dev, "failed to initialize wakeup\n"); return ret; diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c index 844168fcae1e..05adec6b77bf 100644 --- a/drivers/rtc/rtc-da9055.c +++ b/drivers/rtc/rtc-da9055.c @@ -288,7 +288,7 @@ static int da9055_rtc_probe(struct platform_device *pdev) if (ret & DA9055_RTC_ALM_EN) rtc->alarm_enable = 1; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &da9055_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c index dd37b055693c..19c09c418746 100644 --- a/drivers/rtc/rtc-ds3232.c +++ b/drivers/rtc/rtc-ds3232.c @@ -508,7 +508,7 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq, return ret; if (ds3232->irq > 0) - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); ds3232_hwmon_register(dev, name); diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c index 7b82e4a14b7a..f71a6bb77b2a 100644 --- a/drivers/rtc/rtc-isl1208.c +++ b/drivers/rtc/rtc-isl1208.c @@ -830,7 +830,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq) isl1208_driver.driver.name, client); if (!rc) { - device_init_wakeup(&client->dev, 1); + device_init_wakeup(&client->dev, true); enable_irq_wake(irq); } else { dev_err(&client->dev, diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c index bafa7d1b9b88..44bba356268c 100644 --- a/drivers/rtc/rtc-jz4740.c +++ b/drivers/rtc/rtc-jz4740.c @@ -367,7 +367,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); ret = dev_pm_set_wake_irq(dev, irq); if (ret) diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c index 8d713e563d7c..97e5625c064c 100644 --- a/drivers/rtc/rtc-loongson.c +++ b/drivers/rtc/rtc-loongson.c @@ -114,6 +114,13 @@ static irqreturn_t loongson_rtc_isr(int irq, void *id) struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id; rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + */ + regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + return IRQ_HANDLED; } @@ -131,11 +138,7 @@ static u32 loongson_rtc_handler(void *id) writel(RTC_STS, priv->pm_base + PM1_STS_REG); spin_unlock(&priv->lock); - /* - * The TOY_MATCH0_REG should be cleared 0 here, - * otherwise the interrupt cannot be cleared. - */ - return regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + return ACPI_INTERRUPT_HANDLED; } static int loongson_rtc_set_enabled(struct device *dev) @@ -329,7 +332,7 @@ static int loongson_rtc_probe(struct platform_device *pdev) alarm_irq); priv->pm_base = regs - priv->config->pm_offset; - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); if (has_acpi_companion(dev)) acpi_install_fixed_event_handler(ACPI_EVENT_RTC, @@ -360,7 +363,7 @@ static void loongson_rtc_remove(struct platform_device *pdev) acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, loongson_rtc_handler); - device_init_wakeup(dev, 0); + device_init_wakeup(dev, false); loongson_rtc_alarm_irq_enable(dev, 0); } diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c index c0b8fbce1082..0793d70507f7 100644 --- a/drivers/rtc/rtc-lp8788.c +++ b/drivers/rtc/rtc-lp8788.c @@ -293,7 +293,7 @@ static int lp8788_rtc_probe(struct platform_device *pdev) rtc->alarm = lp->pdata ? lp->pdata->alarm_sel : DEFAULT_ALARM_SEL; platform_set_drvdata(pdev, rtc); - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); rtc->rdev = devm_rtc_device_register(dev, "lp8788_rtc", &lp8788_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c index 76ad7031a13d..74280bffe1b0 100644 --- a/drivers/rtc/rtc-lpc32xx.c +++ b/drivers/rtc/rtc-lpc32xx.c @@ -257,7 +257,7 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "Can't request interrupt.\n"); rtc->irq = -1; } else { - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); } } diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index a8f4b645c09d..7bb044d2ac25 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -770,7 +770,7 @@ static int max77686_rtc_probe(struct platform_device *pdev) goto err_rtc; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, id->name, &max77686_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 64bb8ac6ef62..6ce8afbeac68 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -270,7 +270,7 @@ static int max8925_rtc_probe(struct platform_device *pdev) /* XXX - isn't this redundant? */ platform_set_drvdata(pdev, info); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8925-rtc", &max8925_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index 20e50d9fdf88..e7618d715bd8 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -473,7 +473,7 @@ static int max8997_rtc_probe(struct platform_device *pdev) max8997_rtc_enable_wtsr(info, true); max8997_rtc_enable_smpl(info, true); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8997-rtc", &max8997_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-meson-vrtc.c b/drivers/rtc/rtc-meson-vrtc.c index 648fa362ec44..5849729f7d01 100644 --- a/drivers/rtc/rtc-meson-vrtc.c +++ b/drivers/rtc/rtc-meson-vrtc.c @@ -74,7 +74,7 @@ static int meson_vrtc_probe(struct platform_device *pdev) if (IS_ERR(vrtc->io_alarm)) return PTR_ERR(vrtc->io_alarm); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, vrtc); diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c index 600328131603..b90f8337a7e6 100644 --- a/drivers/rtc/rtc-mpc5121.c +++ b/drivers/rtc/rtc-mpc5121.c @@ -303,7 +303,7 @@ static int mpc5121_rtc_probe(struct platform_device *op) return PTR_ERR(rtc->regs); } - device_init_wakeup(&op->dev, 1); + device_init_wakeup(&op->dev, true); platform_set_drvdata(op, rtc); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 152699219a2b..6979d225a78e 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -286,7 +286,7 @@ static int mtk_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc_dev->ops = &mtk_rtc_ops; rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900; diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c index 51029c536244..c27ad626d09f 100644 --- a/drivers/rtc/rtc-mv.c +++ b/drivers/rtc/rtc-mv.c @@ -264,7 +264,7 @@ static int __init mv_rtc_probe(struct platform_device *pdev) } if (pdata->irq >= 0) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else clear_bit(RTC_FEATURE_ALARM, pdata->rtc->features); @@ -287,7 +287,7 @@ static void __exit mv_rtc_remove(struct platform_device *pdev) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); if (pdata->irq >= 0) - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); if (!IS_ERR(pdata->clk)) clk_disable_unprepare(pdata->clk); diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index dbb935dbbd8a..608db97d450c 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -377,7 +377,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) } if (pdata->irq >= 0) { - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c index 13c041bb79f1..570f27af4732 100644 --- a/drivers/rtc/rtc-mxc_v2.c +++ b/drivers/rtc/rtc-mxc_v2.c @@ -302,7 +302,7 @@ static int mxc_rtc_probe(struct platform_device *pdev) if (pdata->irq < 0) return pdata->irq; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_wake_irq(&pdev->dev, pdata->irq); if (ret) dev_err(&pdev->dev, "failed to enable irq wake\n"); diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index c123778e2d9b..0f90065e352c 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -920,7 +920,7 @@ static void omap_rtc_remove(struct platform_device *pdev) omap_rtc_power_off_rtc = NULL; } - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); if (!IS_ERR(rtc->clk)) clk_disable_unprepare(rtc->clk); diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c index 7256a88b490c..aecada6bcf8b 100644 --- a/drivers/rtc/rtc-palmas.c +++ b/drivers/rtc/rtc-palmas.c @@ -287,7 +287,7 @@ static int palmas_rtc_probe(struct platform_device *pdev) palmas_rtc->irq = platform_get_irq(pdev, 0); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &palmas_rtc_ops, THIS_MODULE); if (IS_ERR(palmas_rtc->rtc)) { diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9c04c4e1a49c..31c7dca8f469 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -20,6 +20,7 @@ #include <linux/i2c.h> #include <linux/spi/spi.h> #include <linux/bcd.h> +#include <linux/bitfield.h> #include <linux/rtc.h> #include <linux/slab.h> #include <linux/module.h> @@ -48,6 +49,7 @@ #define PCF2127_BIT_CTRL3_BLF BIT(2) #define PCF2127_BIT_CTRL3_BF BIT(3) #define PCF2127_BIT_CTRL3_BTSE BIT(4) +#define PCF2127_CTRL3_PM GENMASK(7, 5) /* Time and date registers */ #define PCF2127_REG_TIME_BASE 0x03 #define PCF2127_BIT_SC_OSF BIT(7) @@ -331,6 +333,84 @@ static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm) return 0; } +static int pcf2127_param_get(struct device *dev, struct rtc_param *param) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + u32 value; + int ret; + + switch (param->param) { + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF2127_CTRL3_PM, value); + + if (value < 0x3) + param->uvalue = RTC_BSM_LEVEL; + else if (value < 0x6) + param->uvalue = RTC_BSM_DIRECT; + else + param->uvalue = RTC_BSM_DISABLED; + + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int pcf2127_param_set(struct device *dev, struct rtc_param *param) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + u8 mode = 0; + u32 value; + int ret; + + switch (param->param) { + case RTC_PARAM_BACKUP_SWITCH_MODE: + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL3, &value); + if (ret < 0) + return ret; + + value = FIELD_GET(PCF2127_CTRL3_PM, value); + + if (value > 5) + value -= 5; + else if (value > 2) + value -= 3; + + switch (param->uvalue) { + case RTC_BSM_LEVEL: + break; + case RTC_BSM_DIRECT: + mode = 3; + break; + case RTC_BSM_DISABLED: + if (value == 0) + value = 1; + mode = 5; + break; + default: + return -EINVAL; + } + + return regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL3, + PCF2127_CTRL3_PM, + FIELD_PREP(PCF2127_CTRL3_PM, mode + value)); + + break; + + default: + return -EINVAL; + } + + return 0; +} + static int pcf2127_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) { @@ -741,6 +821,8 @@ static const struct rtc_class_ops pcf2127_rtc_ops = { .read_alarm = pcf2127_rtc_read_alarm, .set_alarm = pcf2127_rtc_set_alarm, .alarm_irq_enable = pcf2127_rtc_alarm_irq_enable, + .param_get = pcf2127_param_get, + .param_set = pcf2127_param_set, }; /* sysfs interface */ diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index fdbc07f14036..905986c61655 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -322,7 +322,16 @@ static const struct rtc_class_ops pcf85063_rtc_ops = { static int pcf85063_nvmem_read(void *priv, unsigned int offset, void *val, size_t bytes) { - return regmap_read(priv, PCF85063_REG_RAM, val); + unsigned int tmp; + int ret; + + ret = regmap_read(priv, PCF85063_REG_RAM, &tmp); + if (ret < 0) + return ret; + + *(u8 *)val = tmp; + + return 0; } static int pcf85063_nvmem_write(void *priv, unsigned int offset, diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c index bed3c27e665f..2812da2c50c5 100644 --- a/drivers/rtc/rtc-pic32.c +++ b/drivers/rtc/rtc-pic32.c @@ -330,7 +330,7 @@ static int pic32_rtc_probe(struct platform_device *pdev) pic32_rtc_enable(pdata, 1); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); pdata->rtc->ops = &pic32_rtcops; pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index 2f32187ecc8d..b2518aea4218 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -503,7 +503,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc_dd); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc_dd->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rtc_dd->rtc)) diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index 34d8545c8e15..62ee6b8f9bcd 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -360,7 +360,7 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(dev, 1); + device_init_wakeup(dev, true); return 0; } diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c index eecb49bab56a..8ba9cda74acf 100644 --- a/drivers/rtc/rtc-rc5t583.c +++ b/drivers/rtc/rtc-rc5t583.c @@ -245,7 +245,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "IRQ is not free.\n"); return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ricoh_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &rc5t583_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-rc5t619.c b/drivers/rtc/rtc-rc5t619.c index 711f62eecd79..74d169102074 100644 --- a/drivers/rtc/rtc-rc5t619.c +++ b/drivers/rtc/rtc-rc5t619.c @@ -414,7 +414,7 @@ static int rc5t619_rtc_probe(struct platform_device *pdev) } else { /* enable wake */ - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); enable_irq_wake(rtc->irq); } } else { diff --git a/drivers/rtc/rtc-renesas-rtca3.c b/drivers/rtc/rtc-renesas-rtca3.c index d127933bfc8a..a056291d3887 100644 --- a/drivers/rtc/rtc-renesas-rtca3.c +++ b/drivers/rtc/rtc-renesas-rtca3.c @@ -768,7 +768,7 @@ static int rtca3_probe(struct platform_device *pdev) if (ret) return ret; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); priv->rtc_dev = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(priv->rtc_dev)) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index 2d9bcb3ce1e3..59b8e9a30fe6 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -418,7 +418,7 @@ static int rk808_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(rk808_rtc->rtc)) diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index c0ac3bdb2f42..58c957eb753d 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -456,7 +456,7 @@ static int s3c_rtc_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n", readw(info->base + S3C2410_RTCCON)); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); info->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(info->rtc)) { diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index dad294a0ce2a..36acca5b2639 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -729,7 +729,7 @@ static int s5m_rtc_probe(struct platform_device *pdev) info->irq, ret); return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); } return devm_rtc_register_device(info->rtc_dev); diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c index 13799b1abca1..1ad93648d69c 100644 --- a/drivers/rtc/rtc-sa1100.c +++ b/drivers/rtc/rtc-sa1100.c @@ -292,7 +292,7 @@ static int sa1100_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, info); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return sa1100_rtc_init(pdev, info); } diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c index ce7a2ddbbc16..2b83561d4d28 100644 --- a/drivers/rtc/rtc-sc27xx.c +++ b/drivers/rtc/rtc-sc27xx.c @@ -613,14 +613,14 @@ static int sprd_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); rtc->rtc->ops = &sprd_rtc_ops; rtc->rtc->range_min = 0; rtc->rtc->range_max = 5662310399LL; ret = devm_rtc_register_device(rtc->rtc); if (ret) { - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); return ret; } diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index a5df521876ba..9ea40f40188f 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -611,7 +611,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev) if (ret) goto err_unmap; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; err_unmap: diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c index 26eed927f8b3..959acff8faff 100644 --- a/drivers/rtc/rtc-spear.c +++ b/drivers/rtc/rtc-spear.c @@ -395,7 +395,7 @@ static int spear_rtc_probe(struct platform_device *pdev) goto err_disable_clock; if (!device_can_wakeup(&pdev->dev)) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return 0; @@ -411,7 +411,7 @@ static void spear_rtc_remove(struct platform_device *pdev) spear_rtc_disable_interrupt(config); clk_disable_unprepare(config->clk); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c index 9f1a019ec8af..a0564d443569 100644 --- a/drivers/rtc/rtc-stm32.c +++ b/drivers/rtc/rtc-stm32.c @@ -1074,26 +1074,18 @@ static int stm32_rtc_probe(struct platform_device *pdev) regs = &rtc->data->regs; if (rtc->data->need_dbp) { - rtc->dbp = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "st,syscfg"); + unsigned int args[2]; + + rtc->dbp = syscon_regmap_lookup_by_phandle_args(pdev->dev.of_node, + "st,syscfg", + 2, args); if (IS_ERR(rtc->dbp)) { dev_err(&pdev->dev, "no st,syscfg\n"); return PTR_ERR(rtc->dbp); } - ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg", - 1, &rtc->dbp_reg); - if (ret) { - dev_err(&pdev->dev, "can't read DBP register offset\n"); - return ret; - } - - ret = of_property_read_u32_index(pdev->dev.of_node, "st,syscfg", - 2, &rtc->dbp_mask); - if (ret) { - dev_err(&pdev->dev, "can't read DBP register mask\n"); - return ret; - } + rtc->dbp_reg = args[0]; + rtc->dbp_mask = args[1]; } if (!rtc->data->has_pclk) { diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index e681c1745866..e5e6013d080e 100644 --- a/drivers/rtc/rtc-sun6i.c +++ b/drivers/rtc/rtc-sun6i.c @@ -826,7 +826,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev) clk_prepare_enable(chip->losc); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); chip->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(chip->rtc)) diff --git a/drivers/rtc/rtc-sunplus.c b/drivers/rtc/rtc-sunplus.c index 9b1ce0e8ba27..519a06e728d6 100644 --- a/drivers/rtc/rtc-sunplus.c +++ b/drivers/rtc/rtc-sunplus.c @@ -269,7 +269,7 @@ static int sp_rtc_probe(struct platform_device *plat_dev) if (ret) goto free_reset_assert; - device_init_wakeup(&plat_dev->dev, 1); + device_init_wakeup(&plat_dev->dev, true); dev_set_drvdata(&plat_dev->dev, sp_rtc); sp_rtc->rtc = devm_rtc_allocate_device(&plat_dev->dev); @@ -307,7 +307,7 @@ static void sp_rtc_remove(struct platform_device *plat_dev) { struct sunplus_rtc *sp_rtc = dev_get_drvdata(&plat_dev->dev); - device_init_wakeup(&plat_dev->dev, 0); + device_init_wakeup(&plat_dev->dev, false); reset_control_assert(sp_rtc->rstc); clk_disable_unprepare(sp_rtc->rtcclk); } diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c index 79a3102c8354..46788db89953 100644 --- a/drivers/rtc/rtc-tegra.c +++ b/drivers/rtc/rtc-tegra.c @@ -319,7 +319,7 @@ static int tegra_rtc_probe(struct platform_device *pdev) writel(0xffffffff, info->base + TEGRA_RTC_REG_INTR_STATUS); writel(0, info->base + TEGRA_RTC_REG_INTR_MASK); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); ret = devm_request_irq(&pdev->dev, info->irq, tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH, dev_name(&pdev->dev), diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 7e0d8fb26465..a68b8c884102 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -132,7 +132,7 @@ static int test_probe(struct platform_device *plat_dev) break; default: rtd->rtc->ops = &test_rtc_ops; - device_init_wakeup(&plat_dev->dev, 1); + device_init_wakeup(&plat_dev->dev, true); } timer_setup(&rtd->alarm, test_rtc_alarm_handler, 0); diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index e796729fc817..54c8429b16bf 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -241,7 +241,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) return ret; } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); platform_set_drvdata(pdev, rtc); rtc->rtc = devm_rtc_allocate_device(&pdev->dev); diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index 2ea1bbfbbc2a..284aa2f0392b 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -418,7 +418,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev) tps_rtc->irq = irq; if (irq != -1) { if (device_property_present(tps65910->dev, "wakeup-source")) - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); else device_set_wakeup_capable(&pdev->dev, 1); } else { diff --git a/drivers/rtc/rtc-tps6594.c b/drivers/rtc/rtc-tps6594.c index e69667634137..7c6246e3f029 100644 --- a/drivers/rtc/rtc-tps6594.c +++ b/drivers/rtc/rtc-tps6594.c @@ -37,7 +37,7 @@ #define MAX_OFFSET (277774) // Number of ticks per hour -#define TICKS_PER_HOUR (32768 * 3600) +#define TICKS_PER_HOUR (32768 * 3600LL) // Multiplier for ppb conversions #define PPB_MULT NANO diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 794429182b34..e6106e67e1f4 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -572,7 +572,7 @@ static int twl_rtc_probe(struct platform_device *pdev) return ret; platform_set_drvdata(pdev, twl_rtc); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &twl_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c index 640833e21057..218316be942a 100644 --- a/drivers/rtc/rtc-wm831x.c +++ b/drivers/rtc/rtc-wm831x.c @@ -420,7 +420,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev) if (ret & WM831X_RTC_ALM_ENA) wm831x_rtc->alarm_enabled = 1; - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); wm831x_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(wm831x_rtc->rtc)) diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c index 6797eb4d2e49..3bd60d067a5e 100644 --- a/drivers/rtc/rtc-wm8350.c +++ b/drivers/rtc/rtc-wm8350.c @@ -420,7 +420,7 @@ static int wm8350_rtc_probe(struct platform_device *pdev) } } - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350", &wm8350_rtc_ops, THIS_MODULE); diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c index 0813ea1a03c2..6660b664e8dd 100644 --- a/drivers/rtc/rtc-xgene.c +++ b/drivers/rtc/rtc-xgene.c @@ -174,7 +174,7 @@ static int xgene_rtc_probe(struct platform_device *pdev) /* Turn on the clock and the crystal */ writel(RTC_CCR_EN, pdata->csr_base + RTC_CCR); - ret = device_init_wakeup(&pdev->dev, 1); + ret = device_init_wakeup(&pdev->dev, true); if (ret) { clk_disable_unprepare(pdata->clk); return ret; @@ -197,7 +197,7 @@ static void xgene_rtc_remove(struct platform_device *pdev) struct xgene_rtc_dev *pdata = platform_get_drvdata(pdev); xgene_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); clk_disable_unprepare(pdata->clk); } diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c index af1abb69d1e3..f39102b66eac 100644 --- a/drivers/rtc/rtc-zynqmp.c +++ b/drivers/rtc/rtc-zynqmp.c @@ -318,8 +318,8 @@ static int xlnx_rtc_probe(struct platform_device *pdev) return ret; } - /* Getting the rtc_clk info */ - xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc_clk"); + /* Getting the rtc info */ + xrtcdev->rtc_clk = devm_clk_get_optional(&pdev->dev, "rtc"); if (IS_ERR(xrtcdev->rtc_clk)) { if (PTR_ERR(xrtcdev->rtc_clk) != -EPROBE_DEFER) dev_warn(&pdev->dev, "Device clock not found.\n"); @@ -337,7 +337,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) xlnx_init_rtc(xrtcdev); - device_init_wakeup(&pdev->dev, 1); + device_init_wakeup(&pdev->dev, true); return devm_rtc_register_device(xrtcdev->rtc); } @@ -345,7 +345,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev) static void xlnx_rtc_remove(struct platform_device *pdev) { xlnx_rtc_alarm_irq_enable(&pdev->dev, 0); - device_init_wakeup(&pdev->dev, 0); + device_init_wakeup(&pdev->dev, false); } static int __maybe_unused xlnx_rtc_suspend(struct device *dev) diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index fbffd451031f..45bd001206a2 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -245,7 +245,6 @@ static void sclp_request_timeout(bool force_restart); static void sclp_process_queue(void); static void __sclp_make_read_req(void); static int sclp_init_mask(int calculate); -static int sclp_init(void); static void __sclp_queue_read_req(void) @@ -1251,8 +1250,7 @@ static struct platform_driver sclp_pdrv = { /* Initialize SCLP driver. Return zero if driver is operational, non-zero * otherwise. */ -static int -sclp_init(void) +int sclp_init(void) { unsigned long flags; int rc = 0; @@ -1305,13 +1303,7 @@ fail_unlock: static __init int sclp_initcall(void) { - int rc; - - rc = platform_driver_register(&sclp_pdrv); - if (rc) - return rc; - - return sclp_init(); + return platform_driver_register(&sclp_pdrv); } arch_initcall(sclp_initcall); diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 3dd50ac9c5b0..b2d93a6e36c4 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -123,7 +123,7 @@ static DECLARE_WAIT_QUEUE_HEAD(read_wait_queue); */ static struct vmlogrdr_priv_t sys_ser[] = { - { .system_service = "*LOGREC ", + { .system_service = { '*', 'L', 'O', 'G', 'R', 'E', 'C', ' ' }, .internal_name = "logrec", .recording_name = "EREP", .minor_num = 0, @@ -132,7 +132,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { .autorecording = 1, .autopurge = 1, }, - { .system_service = "*ACCOUNT", + { .system_service = { '*', 'A', 'C', 'C', 'O', 'U', 'N', 'T' }, .internal_name = "account", .recording_name = "ACCOUNT", .minor_num = 1, @@ -141,7 +141,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { .autorecording = 1, .autopurge = 1, }, - { .system_service = "*SYMPTOM", + { .system_service = { '*', 'S', 'Y', 'M', 'P', 'T', 'O', 'M' }, .internal_name = "symptom", .recording_name = "SYMPTOM", .minor_num = 2, @@ -356,7 +356,7 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) if (connect_rc) { pr_err("vmlogrdr: iucv connection to %s " "failed with rc %i \n", - logptr->system_service, connect_rc); + logptr->internal_name, connect_rc); goto out_path; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e7ea1f04164a..d776f13cd160 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2736,6 +2736,7 @@ int scsi_device_quiesce(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; + unsigned int memflags; int err; /* @@ -2750,7 +2751,7 @@ scsi_device_quiesce(struct scsi_device *sdev) blk_set_pm_only(q); - blk_mq_freeze_queue(q); + memflags = blk_mq_freeze_queue(q); /* * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue @@ -2758,7 +2759,7 @@ scsi_device_quiesce(struct scsi_device *sdev) * was called. See also https://lwn.net/Articles/573497/. */ synchronize_rcu(); - blk_mq_unfreeze_queue(q); + blk_mq_unfreeze_queue(q, memflags); mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f2093982b3db..087fcbfc9aaa 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -220,6 +220,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, int new_shift = sbitmap_calculate_shift(depth); bool need_alloc = !sdev->budget_map.map; bool need_free = false; + unsigned int memflags; int ret; struct sbitmap sb_backup; @@ -240,7 +241,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, * and here disk isn't added yet, so freezing is pretty fast */ if (need_free) { - blk_mq_freeze_queue(sdev->request_queue); + memflags = blk_mq_freeze_queue(sdev->request_queue); sb_backup = sdev->budget_map; } ret = sbitmap_init_node(&sdev->budget_map, @@ -256,7 +257,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, else sbitmap_free(&sb_backup); ret = 0; - blk_mq_unfreeze_queue(sdev->request_queue); + blk_mq_unfreeze_queue(sdev->request_queue, memflags); } return ret; } diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 796e37a1d859..3438269a5440 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1439,6 +1439,7 @@ static ssize_t max_number_of_rtt_store(struct device *dev, struct ufs_hba *hba = dev_get_drvdata(dev); struct ufs_dev_info *dev_info = &hba->dev_info; struct scsi_device *sdev; + unsigned int memflags; unsigned int rtt; int ret; @@ -1458,14 +1459,16 @@ static ssize_t max_number_of_rtt_store(struct device *dev, ufshcd_rpm_get_sync(hba); + memflags = memalloc_noio_save(); shost_for_each_device(sdev, hba->host) - blk_mq_freeze_queue(sdev->request_queue); + blk_mq_freeze_queue_nomemsave(sdev->request_queue); ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &rtt); shost_for_each_device(sdev, hba->host) - blk_mq_unfreeze_queue(sdev->request_queue); + blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue); + memalloc_noio_restore(memflags); ufshcd_rpm_put_sync(hba); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 698c43dd5dc8..f28bc763847a 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -202,7 +202,7 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) return inode->i_sb->s_fs_info; } -static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) +static inline struct v9fs_session_info *v9fs_dentry2v9ses(const struct dentry *dentry) { return dentry->d_sb->s_fs_info; } diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 01338d4c2d9e..5061f192eafd 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -61,7 +61,7 @@ static void v9fs_dentry_release(struct dentry *dentry) p9_fid_put(hlist_entry(p, struct p9_fid, dlist)); } -static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct p9_fid *fid; struct inode *inode; @@ -99,14 +99,36 @@ out_valid: return 1; } +static int v9fs_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) +{ + return __v9fs_lookup_revalidate(dentry, flags); +} + +static bool v9fs_dentry_unalias_trylock(const struct dentry *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + return down_write_trylock(&v9ses->rename_sem); +} + +static void v9fs_dentry_unalias_unlock(const struct dentry *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + up_write(&v9ses->rename_sem); +} + const struct dentry_operations v9fs_cached_dentry_operations = { .d_revalidate = v9fs_lookup_revalidate, - .d_weak_revalidate = v9fs_lookup_revalidate, + .d_weak_revalidate = __v9fs_lookup_revalidate, .d_delete = v9fs_cached_dentry_delete, .d_release = v9fs_dentry_release, + .d_unalias_trylock = v9fs_dentry_unalias_trylock, + .d_unalias_unlock = v9fs_dentry_unalias_unlock, }; const struct dentry_operations v9fs_dentry_operations = { .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, + .d_unalias_trylock = v9fs_dentry_unalias_trylock, + .d_unalias_unlock = v9fs_dentry_unalias_unlock, }; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index a843c36fc471..02cbf38e1a77 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -23,7 +23,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, struct dir_context *ctx); -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); +static int afs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_iput(struct dentry *dentry, struct inode *inode); static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, @@ -597,19 +598,19 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, * Do a lookup of a single name in a directory * - just returns the FID the dentry name maps to if found */ -static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, +static int afs_do_lookup_one(struct inode *dir, const struct qstr *name, struct afs_fid *fid, afs_dataversion_t *_dir_version) { struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_lookup_one_cookie cookie = { .ctx.actor = afs_lookup_one_filldir, - .name = dentry->d_name, + .name = *name, .fid.vid = as->volume->vid }; int ret; - _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name); /* search the directory */ ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version); @@ -1023,21 +1024,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, /* * Check the validity of a dentry under RCU conditions. */ -static int afs_d_revalidate_rcu(struct dentry *dentry) +static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry) { - struct afs_vnode *dvnode; - struct dentry *parent; - struct inode *dir; long dir_version, de_version; _enter("%p", dentry); - /* Check the parent directory is still valid first. */ - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - dvnode = AFS_FS_I(dir); if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) return -ECHILD; @@ -1065,11 +1057,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode, *dir; + struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir); struct afs_fid fid; - struct dentry *parent; struct inode *inode; struct key *key; afs_dataversion_t dir_version, invalid_before; @@ -1077,7 +1069,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) int ret; if (flags & LOOKUP_RCU) - return afs_d_revalidate_rcu(dentry); + return afs_d_revalidate_rcu(dir, dentry); if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); @@ -1092,14 +1084,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(key)) key = NULL; - /* Hold the parent dentry so we can peer at it */ - parent = dget_parent(dentry); - dir = AFS_FS_I(d_inode(parent)); - /* validate the parent directory */ ret = afs_validate(dir, key); if (ret == -ERESTARTSYS) { - dput(parent); key_put(key); return ret; } @@ -1127,7 +1114,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, &dir_version); + ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version); switch (ret) { case 0: /* the filename maps to something */ @@ -1171,22 +1158,19 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) goto out_valid; default: - _debug("failed to iterate dir %pd: %d", - parent, ret); + _debug("failed to iterate parent %pd2: %d", dentry, ret); goto not_found; } out_valid: dentry->d_fsdata = (void *)(unsigned long)dir_version; out_valid_noupdate: - dput(parent); key_put(key); _leave(" = 1 [valid]"); return 1; not_found: _debug("dropping dentry %pd2", dentry); - dput(parent); key_put(key); _leave(" = 0 [bad]"); diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 672ca2c1d37d..ca755e8d1a37 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -24,7 +24,10 @@ do { \ } while (0) const char * const bch2_btree_node_flags[] = { -#define x(f) #f, + "typebit", + "typebit", + "typebit", +#define x(f) [BTREE_NODE_##f] = #f, BTREE_FLAGS() #undef x NULL diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 367231ab1980..5988219c6908 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2239,8 +2239,6 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos if (unlikely(ret)) return bkey_s_c_err(ret); - btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); - k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u); if (!k.k) return k; @@ -2251,6 +2249,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos iter->k = u; k.k = &iter->k; + btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); return k; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 3b62296c3100..c378b97ebeca 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -291,8 +291,10 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, unsigned flags) { - if (flags & BTREE_ITER_cached_nofill) + if (flags & BTREE_ITER_cached_nofill) { + ck_path->l[0].b = NULL; return 0; + } struct bch_fs *c = trans->c; struct btree_iter iter; diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 6b79b672e0b1..2760dd9569ed 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, unsigned flags) { return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, - trans->journal_u64s, flags); + trans->journal_u64s, flags, trans); } #define JSET_ENTRY_LOG_U64s 4 diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index f99ff1819597..114bf2f3879f 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -4,6 +4,7 @@ #include "compress.h" #include "error.h" #include "extents.h" +#include "io_write.h" #include "opts.h" #include "super-io.h" @@ -254,11 +255,14 @@ err: goto out; } -int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, - struct bch_extent_crc_unpacked *crc) +int bch2_bio_uncompress_inplace(struct bch_write_op *op, + struct bio *bio) { + struct bch_fs *c = op->c; + struct bch_extent_crc_unpacked *crc = &op->crc; struct bbuf data = { NULL }; size_t dst_len = crc->uncompressed_size << 9; + int ret = 0; /* bio must own its pages: */ BUG_ON(!bio->bi_vcnt); @@ -266,17 +270,26 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max || crc->compressed_size << 9 > c->opts.encoded_extent_max) { - bch_err(c, "error rewriting existing data: extent too big"); + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "error rewriting existing data: extent too big"); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); return -EIO; } data = __bounce_alloc(c, dst_len, WRITE); if (__bio_uncompress(c, bio, data.b, *crc)) { - if (!c->opts.no_data_io) - bch_err(c, "error rewriting existing data: decompression error"); - bio_unmap_or_unbounce(c, data); - return -EIO; + if (!c->opts.no_data_io) { + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "error rewriting existing data: decompression error"); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } + ret = -EIO; + goto err; } /* @@ -293,9 +306,9 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio, crc->uncompressed_size = crc->live_size; crc->offset = 0; crc->csum = (struct bch_csum) { 0, 0 }; - +err: bio_unmap_or_unbounce(c, data); - return 0; + return ret; } int bch2_bio_uncompress(struct bch_fs *c, struct bio *src, diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index 607fd5e232c9..bec2f05bfd52 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -47,8 +47,8 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; } -int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, - struct bch_extent_crc_unpacked *); +struct bch_write_op; +int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *); int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *, struct bvec_iter, struct bch_extent_crc_unpacked); unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *, diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 585214931e05..337494facac6 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -91,15 +91,28 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc return true; } -static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) +static noinline void trace_move_extent_finish2(struct data_update *u, + struct bkey_i *new, + struct bkey_i *insert) { - if (trace_move_extent_finish_enabled()) { - struct printbuf buf = PRINTBUF; + struct bch_fs *c = u->op.c; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, k); - trace_move_extent_finish(c, buf.buf); - printbuf_exit(&buf); - } + prt_newline(&buf); + + bch2_data_update_to_text(&buf, u); + prt_newline(&buf); + + prt_str_indented(&buf, "new replicas:\t"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); + prt_newline(&buf); + + prt_str_indented(&buf, "insert:\t"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + prt_newline(&buf); + + trace_move_extent_finish(c, buf.buf); + printbuf_exit(&buf); } static void trace_move_extent_fail2(struct data_update *m, @@ -372,7 +385,8 @@ restart_drop_extra_replicas: bch2_btree_iter_set_pos(&iter, next_pos); this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size); - trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i)); + if (trace_move_extent_finish_enabled()) + trace_move_extent_finish2(m, &new->k_i, insert); } err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -525,34 +539,38 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, struct data_update_opts *data_opts) { printbuf_tabstop_push(out, 20); - prt_str(out, "rewrite ptrs:\t"); + + prt_str_indented(out, "rewrite ptrs:\t"); bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); prt_newline(out); - prt_str(out, "kill ptrs:\t"); + prt_str_indented(out, "kill ptrs:\t"); bch2_prt_u64_base2(out, data_opts->kill_ptrs); prt_newline(out); - prt_str(out, "target:\t"); + prt_str_indented(out, "target:\t"); bch2_target_to_text(out, c, data_opts->target); prt_newline(out); - prt_str(out, "compression:\t"); + prt_str_indented(out, "compression:\t"); bch2_compression_opt_to_text(out, io_opts->background_compression); prt_newline(out); - prt_str(out, "opts.replicas:\t"); + prt_str_indented(out, "opts.replicas:\t"); prt_u64(out, io_opts->data_replicas); + prt_newline(out); - prt_str(out, "extra replicas:\t"); + prt_str_indented(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); } void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) { - bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); - prt_newline(out); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); + prt_newline(out); + + prt_str_indented(out, "old key:\t"); + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); } int bch2_extent_drop_ptrs(struct btree_trans *trans, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index b5de52a50d10..55333e82d1fe 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -20,6 +20,7 @@ #include "extents.h" #include "fsck.h" #include "inode.h" +#include "journal_reclaim.h" #include "super.h" #include <linux/console.h> diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 3e71860f66b9..dd508d93e9fc 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -406,7 +406,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); } -static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) +void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) { __bch2_write_op_error(out, op, op->pos.offset); } @@ -873,7 +873,7 @@ static enum prep_encoded_ret { if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) return PREP_ENCODED_CHECKSUM_ERR; - if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) + if (bch2_bio_uncompress_inplace(op, bio)) return PREP_ENCODED_ERR; } diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index 5400ce94ee57..b4626013abc8 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -20,6 +20,8 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, enum bch_data_type, const struct bkey_i *, bool); +void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op); + #define BCH_WRITE_FLAGS() \ x(ALLOC_NOWAIT) \ x(CACHED) \ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 2cd20114b74b..cb2c3722f674 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -113,11 +113,10 @@ journal_seq_to_buf(struct journal *j, u64 seq) static void journal_pin_list_init(struct journal_entry_pin_list *p, int count) { - unsigned i; - - for (i = 0; i < ARRAY_SIZE(p->list); i++) - INIT_LIST_HEAD(&p->list[i]); - INIT_LIST_HEAD(&p->flushed); + for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++) + INIT_LIST_HEAD(&p->unflushed[i]); + for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++) + INIT_LIST_HEAD(&p->flushed[i]); atomic_set(&p->count, count); p->devs.nr = 0; } @@ -601,6 +600,16 @@ out: : -BCH_ERR_journal_res_get_blocked; } +static unsigned max_dev_latency(struct bch_fs *c) +{ + u64 nsecs = 0; + + for_each_rw_member(c, ca) + nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration); + + return nsecs_to_jiffies(nsecs); +} + /* * Essentially the entry function to the journaling code. When bcachefs is doing * a btree insert, it calls this function to get the current journal write. @@ -612,17 +621,31 @@ out: * btree node write locks. */ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, - unsigned flags) + unsigned flags, + struct btree_trans *trans) { int ret; if (closure_wait_event_timeout(&j->async_wait, (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || (flags & JOURNAL_RES_GET_NONBLOCK), - HZ * 10)) + HZ)) return ret; + if (trans) + bch2_trans_unlock_long(trans); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10); + + remaining_wait = max(0, remaining_wait - HZ); + + if (closure_wait_event_timeout(&j->async_wait, + (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked || + (flags & JOURNAL_RES_GET_NONBLOCK), + remaining_wait)) + return ret; + struct printbuf buf = PRINTBUF; bch2_journal_debug_to_text(&buf, j); bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s", @@ -727,7 +750,7 @@ recheck_need_open: * livelock: */ sched_annotate_sleep(); - ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); if (ret) return ret; @@ -848,7 +871,7 @@ out: static int __bch2_journal_meta(struct journal *j) { struct journal_res res = {}; - int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL); if (ret) return ret; @@ -1602,54 +1625,3 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) __bch2_journal_debug_to_text(out, j); spin_unlock(&j->lock); } - -bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) -{ - struct journal_entry_pin_list *pin_list; - struct journal_entry_pin *pin; - - spin_lock(&j->lock); - if (!test_bit(JOURNAL_running, &j->flags)) { - spin_unlock(&j->lock); - return true; - } - - *seq = max(*seq, j->pin.front); - - if (*seq >= j->pin.back) { - spin_unlock(&j->lock); - return true; - } - - out->atomic++; - - pin_list = journal_seq_pin(j, *seq); - - prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); - printbuf_indent_add(out, 2); - - for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++) - list_for_each_entry(pin, &pin_list->list[i], list) - prt_printf(out, "\t%px %ps\n", pin, pin->flush); - - if (!list_empty(&pin_list->flushed)) - prt_printf(out, "flushed:\n"); - - list_for_each_entry(pin, &pin_list->flushed, list) - prt_printf(out, "\t%px %ps\n", pin, pin->flush); - - printbuf_indent_sub(out, 2); - - --out->atomic; - spin_unlock(&j->lock); - - return false; -} - -void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) -{ - u64 seq = 0; - - while (!bch2_journal_seq_pins_to_text(out, j, &seq)) - seq++; -} diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index cb0df0663946..dccddd5420ad 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j, } int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, - unsigned); + unsigned, struct btree_trans *); /* First bits for BCH_WATERMARK: */ enum journal_res_flags { @@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j, } static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res, - unsigned u64s, unsigned flags) + unsigned u64s, unsigned flags, + struct btree_trans *trans) { int ret; @@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re if (journal_res_get_fast(j, res, flags)) goto out; - ret = bch2_journal_res_get_slowpath(j, res, flags); + ret = bch2_journal_res_get_slowpath(j, res, flags, trans); if (ret) return ret; out: @@ -429,8 +430,6 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *); -void bch2_journal_pins_to_text(struct printbuf *, struct journal *); -bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7f2efe85a805..11c39e0c34f4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -17,6 +17,7 @@ #include "sb-clean.h" #include "trace.h" +#include <linux/ioprio.h> #include <linux/string_choices.h> void bch2_journal_pos_from_member_info_set(struct bch_fs *c) @@ -1763,6 +1764,7 @@ static CLOSURE_CALLBACK(journal_write_submit) bio->bi_iter.bi_sector = ptr->offset; bio->bi_end_io = journal_write_endio; bio->bi_private = ca; + bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 0); BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector); ca->prev_journal_sector = bio->bi_iter.bi_sector; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 3c8242606da7..6a9cefb635d6 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -327,8 +327,10 @@ void bch2_journal_reclaim_fast(struct journal *j) popped = true; } - if (popped) + if (popped) { bch2_journal_space_available(j); + __closure_wake_up(&j->reclaim_flush_wait); + } } bool __bch2_journal_pin_put(struct journal *j, u64 seq) @@ -362,6 +364,9 @@ static inline bool __journal_pin_drop(struct journal *j, pin->seq = 0; list_del_init(&pin->list); + if (j->reclaim_flush_wait.list.first) + __closure_wake_up(&j->reclaim_flush_wait); + /* * Unpinning a journal entry may make journal_next_bucket() succeed, if * writing a new last_seq will now make another bucket available: @@ -383,11 +388,11 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) { if (fn == bch2_btree_node_flush0 || fn == bch2_btree_node_flush1) - return JOURNAL_PIN_btree; + return JOURNAL_PIN_TYPE_btree; else if (fn == bch2_btree_key_cache_journal_flush) - return JOURNAL_PIN_key_cache; + return JOURNAL_PIN_TYPE_key_cache; else - return JOURNAL_PIN_other; + return JOURNAL_PIN_TYPE_other; } static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, @@ -406,7 +411,12 @@ static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq, atomic_inc(&pin_list->count); pin->seq = seq; pin->flush = flush_fn; - list_add(&pin->list, &pin_list->list[type]); + + if (list_empty(&pin_list->unflushed[type]) && + j->reclaim_flush_wait.list.first) + __closure_wake_up(&j->reclaim_flush_wait); + + list_add(&pin->list, &pin_list->unflushed[type]); } void bch2_journal_pin_copy(struct journal *j, @@ -499,16 +509,15 @@ journal_get_next_pin(struct journal *j, { struct journal_entry_pin_list *pin_list; struct journal_entry_pin *ret = NULL; - unsigned i; fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) { if (*seq > seq_to_flush && !allowed_above_seq) break; - for (i = 0; i < JOURNAL_PIN_NR; i++) - if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) || - ((1U << i) & allowed_above_seq)) { - ret = list_first_entry_or_null(&pin_list->list[i], + for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) + if (((BIT(i) & allowed_below_seq) && *seq <= seq_to_flush) || + (BIT(i) & allowed_above_seq)) { + ret = list_first_entry_or_null(&pin_list->unflushed[i], struct journal_entry_pin, list); if (ret) return ret; @@ -544,8 +553,8 @@ static size_t journal_flush_pins(struct journal *j, } if (min_key_cache) { - allowed_above |= 1U << JOURNAL_PIN_key_cache; - allowed_below |= 1U << JOURNAL_PIN_key_cache; + allowed_above |= BIT(JOURNAL_PIN_TYPE_key_cache); + allowed_below |= BIT(JOURNAL_PIN_TYPE_key_cache); } cond_resched(); @@ -553,7 +562,9 @@ static size_t journal_flush_pins(struct journal *j, j->last_flushed = jiffies; spin_lock(&j->lock); - pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq); + pin = journal_get_next_pin(j, seq_to_flush, + allowed_below, + allowed_above, &seq); if (pin) { BUG_ON(j->flush_in_progress); j->flush_in_progress = pin; @@ -576,7 +587,7 @@ static size_t journal_flush_pins(struct journal *j, spin_lock(&j->lock); /* Pin might have been dropped or rearmed: */ if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed); + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]); j->flush_in_progress = NULL; j->flush_in_progress_dropped = false; spin_unlock(&j->lock); @@ -816,10 +827,41 @@ int bch2_journal_reclaim_start(struct journal *j) return 0; } +static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, + unsigned types) +{ + struct journal_entry_pin_list *pin_list; + u64 seq; + + spin_lock(&j->lock); + fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { + if (seq > seq_to_flush) + break; + + for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) + if ((BIT(i) & types) && + (!list_empty(&pin_list->unflushed[i]) || + !list_empty(&pin_list->flushed[i]))) { + spin_unlock(&j->lock); + return true; + } + } + spin_unlock(&j->lock); + + return false; +} + +static bool journal_flush_pins_or_still_flushing(struct journal *j, u64 seq_to_flush, + unsigned types) +{ + return journal_flush_pins(j, seq_to_flush, types, 0, 0, 0) || + journal_pins_still_flushing(j, seq_to_flush, types); +} + static int journal_flush_done(struct journal *j, u64 seq_to_flush, bool *did_work) { - int ret; + int ret = 0; ret = bch2_journal_error(j); if (ret) @@ -827,12 +869,18 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - if (journal_flush_pins(j, seq_to_flush, - (1U << JOURNAL_PIN_key_cache)| - (1U << JOURNAL_PIN_other), 0, 0, 0) || - journal_flush_pins(j, seq_to_flush, - (1U << JOURNAL_PIN_btree), 0, 0, 0)) + if (journal_flush_pins_or_still_flushing(j, seq_to_flush, + BIT(JOURNAL_PIN_TYPE_key_cache)| + BIT(JOURNAL_PIN_TYPE_other))) { + *did_work = true; + goto unlock; + } + + if (journal_flush_pins_or_still_flushing(j, seq_to_flush, + BIT(JOURNAL_PIN_TYPE_btree))) { *did_work = true; + goto unlock; + } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); @@ -847,6 +895,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, !fifo_used(&j->pin); spin_unlock(&j->lock); +unlock: mutex_unlock(&j->reclaim_lock); return ret; @@ -860,7 +909,7 @@ bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) if (!test_bit(JOURNAL_running, &j->flags)) return false; - closure_wait_event(&j->async_wait, + closure_wait_event(&j->reclaim_flush_wait, journal_flush_done(j, seq_to_flush, &did_work)); return did_work; @@ -926,3 +975,54 @@ err: return ret; } + +bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) +{ + struct journal_entry_pin_list *pin_list; + struct journal_entry_pin *pin; + + spin_lock(&j->lock); + if (!test_bit(JOURNAL_running, &j->flags)) { + spin_unlock(&j->lock); + return true; + } + + *seq = max(*seq, j->pin.front); + + if (*seq >= j->pin.back) { + spin_unlock(&j->lock); + return true; + } + + out->atomic++; + + pin_list = journal_seq_pin(j, *seq); + + prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count)); + printbuf_indent_add(out, 2); + + prt_printf(out, "unflushed:\n"); + for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++) + list_for_each_entry(pin, &pin_list->unflushed[i], list) + prt_printf(out, "\t%px %ps\n", pin, pin->flush); + + prt_printf(out, "flushed:\n"); + for (unsigned i = 0; i < ARRAY_SIZE(pin_list->flushed); i++) + list_for_each_entry(pin, &pin_list->flushed[i], list) + prt_printf(out, "\t%px %ps\n", pin, pin->flush); + + printbuf_indent_sub(out, 2); + + --out->atomic; + spin_unlock(&j->lock); + + return false; +} + +void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) +{ + u64 seq = 0; + + while (!bch2_journal_seq_pins_to_text(out, j, &seq)) + seq++; +} diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h index ec84c3345281..0a73d7134e1c 100644 --- a/fs/bcachefs/journal_reclaim.h +++ b/fs/bcachefs/journal_reclaim.h @@ -78,4 +78,7 @@ static inline bool bch2_journal_flush_all_pins(struct journal *j) int bch2_journal_flush_device_pins(struct journal *, int); +void bch2_journal_pins_to_text(struct printbuf *, struct journal *); +bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); + #endif /* _BCACHEFS_JOURNAL_RECLAIM_H */ diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index e9bd716fbb71..3ba433a48eb8 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -53,15 +53,15 @@ struct journal_buf { */ enum journal_pin_type { - JOURNAL_PIN_btree, - JOURNAL_PIN_key_cache, - JOURNAL_PIN_other, - JOURNAL_PIN_NR, + JOURNAL_PIN_TYPE_btree, + JOURNAL_PIN_TYPE_key_cache, + JOURNAL_PIN_TYPE_other, + JOURNAL_PIN_TYPE_NR, }; struct journal_entry_pin_list { - struct list_head list[JOURNAL_PIN_NR]; - struct list_head flushed; + struct list_head unflushed[JOURNAL_PIN_TYPE_NR]; + struct list_head flushed[JOURNAL_PIN_TYPE_NR]; atomic_t count; struct bch_devs_list devs; }; @@ -226,6 +226,7 @@ struct journal { /* Used when waiting because the journal was full */ wait_queue_head_t wait; struct closure_waitlist async_wait; + struct closure_waitlist reclaim_flush_wait; struct delayed_work write_work; struct workqueue_struct *wq; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 85c361e78ba5..21805509ab9e 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -215,7 +215,8 @@ static int bch2_copygc(struct moving_context *ctxt, }; move_buckets buckets = { 0 }; struct move_bucket_in_flight *f; - u64 moved = atomic64_read(&ctxt->stats->sectors_moved); + u64 sectors_seen = atomic64_read(&ctxt->stats->sectors_seen); + u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved); int ret = 0; ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets); @@ -245,7 +246,6 @@ static int bch2_copygc(struct moving_context *ctxt, *did_work = true; } err: - darray_exit(&buckets); /* no entries in LRU btree found, or got to end: */ if (bch2_err_matches(ret, ENOENT)) @@ -254,8 +254,11 @@ err: if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err_msg(c, ret, "from bch2_move_data()"); - moved = atomic64_read(&ctxt->stats->sectors_moved) - moved; - trace_and_count(c, copygc, c, moved, 0, 0, 0); + sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen; + sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved; + trace_and_count(c, copygc, c, buckets.nr, sectors_seen, sectors_moved); + + darray_exit(&buckets); return ret; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index e763d52e0f38..a182b5d454ba 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -476,13 +476,13 @@ enum fsck_err_opts { NULL, "Enable nocow mode: enables runtime locking in\n"\ "data move path needed if nocow will ever be in use\n")\ x(copygc_enabled, u8, \ - OPT_FS|OPT_MOUNT, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, true, \ NULL, "Enable copygc: disable for debugging, or to\n"\ "quiet the system when doing performance testing\n")\ x(rebalance_enabled, u8, \ - OPT_FS|OPT_MOUNT, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ BCH2_NO_SB_OPT, true, \ NULL, "Enable rebalance: disable for debugging, or to\n"\ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 0b4fe899209b..ea0a18364751 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -57,7 +57,7 @@ enum bch_fsck_flags { x(bset_wrong_sector_offset, 44, 0) \ x(bset_empty, 45, 0) \ x(bset_bad_seq, 46, 0) \ - x(bset_blacklisted_journal_seq, 47, 0) \ + x(bset_blacklisted_journal_seq, 47, FSCK_AUTOFIX) \ x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \ x(btree_node_bad_btree, 49, 0) \ x(btree_node_bad_level, 50, 0) \ diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index 8c2c5539de2e..d78451c2a0c6 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -31,11 +31,11 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir } } -static int fsck_rename_dirent(struct btree_trans *trans, - struct snapshots_seen *s, - const struct bch_hash_desc desc, - struct bch_hash_info *hash_info, - struct bkey_s_c_dirent old) +static noinline int fsck_rename_dirent(struct btree_trans *trans, + struct snapshots_seen *s, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c_dirent old) { struct qstr old_name = bch2_dirent_get_name(old); struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); @@ -71,11 +71,11 @@ static int fsck_rename_dirent(struct btree_trans *trans, return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); } -static int hash_pick_winner(struct btree_trans *trans, - const struct bch_hash_desc desc, - struct bch_hash_info *hash_info, - struct bkey_s_c k1, - struct bkey_s_c k2) +static noinline int hash_pick_winner(struct btree_trans *trans, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c k1, + struct bkey_s_c k2) { if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) @@ -142,8 +142,8 @@ fsck_err: * All versions of the same inode in different snapshots must have the same hash * seed/type: verify that the hash info we're using matches the root */ -static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, - struct bch_hash_info *hash_info) +static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, + struct bch_hash_info *hash_info) { struct bch_fs *c = trans->c; struct btree_iter iter; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 9d40b7d4ea29..56a5a7fbc0fd 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -902,32 +902,30 @@ TRACE_EVENT(evacuate_bucket, TRACE_EVENT(copygc, TP_PROTO(struct bch_fs *c, - u64 sectors_moved, u64 sectors_not_moved, - u64 buckets_moved, u64 buckets_not_moved), - TP_ARGS(c, - sectors_moved, sectors_not_moved, - buckets_moved, buckets_not_moved), + u64 buckets, + u64 sectors_seen, + u64 sectors_moved), + TP_ARGS(c, buckets, sectors_seen, sectors_moved), TP_STRUCT__entry( __field(dev_t, dev ) + __field(u64, buckets ) + __field(u64, sectors_seen ) __field(u64, sectors_moved ) - __field(u64, sectors_not_moved ) - __field(u64, buckets_moved ) - __field(u64, buckets_not_moved ) ), TP_fast_assign( __entry->dev = c->dev; + __entry->buckets = buckets; + __entry->sectors_seen = sectors_seen; __entry->sectors_moved = sectors_moved; - __entry->sectors_not_moved = sectors_not_moved; - __entry->buckets_moved = buckets_moved; - __entry->buckets_not_moved = buckets_moved; ), - TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu", + TP_printk("%d,%d buckets %llu sectors seen %llu moved %llu", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->sectors_moved, __entry->sectors_not_moved, - __entry->buckets_moved, __entry->buckets_not_moved) + __entry->buckets, + __entry->sectors_seen, + __entry->sectors_moved) ); TRACE_EVENT(copygc_wait, diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fdf9dc15eafa..fdd404fc8112 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -412,7 +412,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { - char name[100]; + char name[NAME_MAX]; doutc(fsc->client, "begin\n"); fsc->debugfs_congestion_kb = diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0bf388e07a02..62e99e65250d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1940,29 +1940,19 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, /* * Check if cached dentry can be trusted. */ -static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) +static int ceph_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; struct ceph_client *cl = mdsc->fsc->client; int valid = 0; - struct dentry *parent; - struct inode *dir, *inode; + struct inode *inode; - valid = fscrypt_d_revalidate(dentry, flags); + valid = fscrypt_d_revalidate(dir, name, dentry, flags); if (valid <= 0) return valid; - if (flags & LOOKUP_RCU) { - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - inode = d_inode_rcu(dentry); - } else { - parent = dget_parent(dentry); - dir = d_inode(parent); - inode = d_inode(dentry); - } + inode = d_inode_rcu(dentry); doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n", dentry, dentry, inode, ceph_dentry(dentry)->offset, @@ -2008,6 +1998,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) req->r_parent = dir; ihold(dir); + req->r_dname = name; + mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; @@ -2038,9 +2030,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid"); if (!valid) ceph_dir_clear_complete(dir); - - if (!(flags & LOOKUP_RCU)) - dput(parent); return valid; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 785fe489ef4b..54b3421501e9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2621,6 +2621,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) { struct inode *dir = req->r_parent; struct dentry *dentry = req->r_dentry; + const struct qstr *name = req->r_dname; u8 *cryptbuf = NULL; u32 len = 0; int ret = 0; @@ -2641,8 +2642,10 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) if (!fscrypt_has_encryption_key(dir)) goto success; - if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX, - &len)) { + if (!name) + name = &dentry->d_name; + + if (!fscrypt_fname_encrypted_size(dir, name->len, NAME_MAX, &len)) { WARN_ON_ONCE(1); return ERR_PTR(-ENAMETOOLONG); } @@ -2657,7 +2660,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) if (!cryptbuf) return ERR_PTR(-ENOMEM); - ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len); + ret = fscrypt_fname_encrypt(dir, name, cryptbuf, len); if (ret) { kfree(cryptbuf); return ERR_PTR(ret); @@ -2945,12 +2948,12 @@ static struct ceph_mds_request_head_legacy * find_legacy_request_head(void *p, u64 features) { bool legacy = !(features & CEPH_FEATURE_FS_BTIME); - struct ceph_mds_request_head_old *ohead; + struct ceph_mds_request_head *head; if (legacy) return (struct ceph_mds_request_head_legacy *)p; - ohead = (struct ceph_mds_request_head_old *)p; - return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid; + head = (struct ceph_mds_request_head *)p; + return (struct ceph_mds_request_head_legacy *)&head->oldest_client_tid; } /* @@ -3020,7 +3023,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (legacy) len = sizeof(struct ceph_mds_request_head_legacy); else if (request_head_version == 1) - len = sizeof(struct ceph_mds_request_head_old); + len = offsetofend(struct ceph_mds_request_head, args); else if (request_head_version == 2) len = offsetofend(struct ceph_mds_request_head, ext_num_fwd); else @@ -3104,11 +3107,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.version = cpu_to_le16(3); p = msg->front.iov_base + sizeof(*lhead); } else if (request_head_version == 1) { - struct ceph_mds_request_head_old *ohead = msg->front.iov_base; + struct ceph_mds_request_head *nhead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(4); - ohead->version = cpu_to_le16(1); - p = msg->front.iov_base + sizeof(*ohead); + nhead->version = cpu_to_le16(1); + p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, args); } else if (request_head_version == 2) { struct ceph_mds_request_head *nhead = msg->front.iov_base; @@ -3265,7 +3268,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, * so we limit to retry at most 256 times. */ if (req->r_attempts) { - old_max_retry = sizeof_field(struct ceph_mds_request_head_old, + old_max_retry = sizeof_field(struct ceph_mds_request_head, num_retry); old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); if ((old_version && req->r_attempts >= old_max_retry) || @@ -5690,18 +5693,18 @@ static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, * * All the other cases --> mismatch */ + bool path_matched = true; char *first = strstr(_tpath, auth->match.path); - if (first != _tpath) { - if (free_tpath) - kfree(_tpath); - return 0; + if (first != _tpath || + (tlen > len && _tpath[len] != '/')) { + path_matched = false; } - if (tlen > len && _tpath[len] != '/') { - if (free_tpath) - kfree(_tpath); + if (free_tpath) + kfree(_tpath); + + if (!path_matched) return 0; - } } } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 38bb7e0d2d79..7c9fee9e80d4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -299,6 +299,8 @@ struct ceph_mds_request { struct inode *r_target_inode; /* resulting inode */ struct inode *r_new_inode; /* new inode (for creates) */ + const struct qstr *r_dname; /* stable name (for ->d_revalidate) */ + #define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */ #define CEPH_MDS_R_ABORTED (2) /* call was aborted */ #define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 4e552ba7bd43..a3e2dfeedfbf 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -445,7 +445,8 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) } /* called when a cache lookup succeeds */ -static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) +static int coda_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *de, unsigned int flags) { struct inode *inode; struct coda_inode_info *cii; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 0ad52fbe51c9..010f9c0a4c2f 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -574,11 +574,10 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); * Validate dentries in encrypted directories to make sure we aren't potentially * caching stale dentries after a key has been added. */ -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct dentry *dir; int err; - int valid; /* * Plaintext names are always valid, since fscrypt doesn't support @@ -591,30 +590,21 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) /* * No-key name; valid if the directory's key is still unavailable. * - * Although fscrypt forbids rename() on no-key names, we still must use - * dget_parent() here rather than use ->d_parent directly. That's - * because a corrupted fs image may contain directory hard links, which - * the VFS handles by moving the directory's dentry tree in the dcache - * each time ->lookup() finds the directory and it already has a dentry - * elsewhere. Thus ->d_parent can be changing, and we must safely grab - * a reference to some ->d_parent to prevent it from being freed. + * Note in RCU mode we have to bail if we get here - + * fscrypt_get_encryption_info() may block. */ if (flags & LOOKUP_RCU) return -ECHILD; - dir = dget_parent(dentry); /* * Pass allow_unsupported=true, so that files with an unsupported * encryption policy can be deleted. */ - err = fscrypt_get_encryption_info(d_inode(dir), true); - valid = !fscrypt_has_encryption_key(d_inode(dir)); - dput(dir); - + err = fscrypt_get_encryption_info(dir, true); if (err < 0) return err; - return valid; + return !fscrypt_has_encryption_key(dir); } EXPORT_SYMBOL_GPL(fscrypt_d_revalidate); diff --git a/fs/dcache.c b/fs/dcache.c index 1cd929f17eec..9cc0d47da321 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -295,12 +295,16 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c return dentry_string_cmp(cs, ct, tcount); } +/* + * long names are allocated separately from dentry and never modified. + * Refcounted, freeing is RCU-delayed. See take_dentry_name_snapshot() + * for the reason why ->count and ->head can't be combined into a union. + * dentry_string_cmp() relies upon ->name[] being word-aligned. + */ struct external_name { - union { - atomic_t count; - struct rcu_head head; - } u; - unsigned char name[]; + atomic_t count; + struct rcu_head head; + unsigned char name[] __aligned(sizeof(unsigned long)); }; static inline struct external_name *external_name(struct dentry *dentry) @@ -324,31 +328,45 @@ static void __d_free_external(struct rcu_head *head) static inline int dname_external(const struct dentry *dentry) { - return dentry->d_name.name != dentry->d_iname; + return dentry->d_name.name != dentry->d_shortname.string; } void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) { - spin_lock(&dentry->d_lock); - name->name = dentry->d_name; - if (unlikely(dname_external(dentry))) { - atomic_inc(&external_name(dentry)->u.count); + unsigned seq; + const unsigned char *s; + + rcu_read_lock(); +retry: + seq = read_seqcount_begin(&dentry->d_seq); + s = READ_ONCE(dentry->d_name.name); + name->name.hash_len = dentry->d_name.hash_len; + name->name.name = name->inline_name.string; + if (likely(s == dentry->d_shortname.string)) { + name->inline_name = dentry->d_shortname; } else { - memcpy(name->inline_name, dentry->d_iname, - dentry->d_name.len + 1); - name->name.name = name->inline_name; + struct external_name *p; + p = container_of(s, struct external_name, name[0]); + // get a valid reference + if (unlikely(!atomic_inc_not_zero(&p->count))) + goto retry; + name->name.name = s; } - spin_unlock(&dentry->d_lock); + if (read_seqcount_retry(&dentry->d_seq, seq)) { + release_dentry_name_snapshot(name); + goto retry; + } + rcu_read_unlock(); } EXPORT_SYMBOL(take_dentry_name_snapshot); void release_dentry_name_snapshot(struct name_snapshot *name) { - if (unlikely(name->name.name != name->inline_name)) { + if (unlikely(name->name.name != name->inline_name.string)) { struct external_name *p; p = container_of(name->name.name, struct external_name, name[0]); - if (unlikely(atomic_dec_and_test(&p->u.count))) - kfree_rcu(p, u.head); + if (unlikely(atomic_dec_and_test(&p->count))) + kfree_rcu(p, head); } } EXPORT_SYMBOL(release_dentry_name_snapshot); @@ -386,7 +404,7 @@ static void dentry_free(struct dentry *dentry) WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); - if (likely(atomic_dec_and_test(&p->u.count))) { + if (likely(atomic_dec_and_test(&p->count))) { call_rcu(&dentry->d_u.d_rcu, __d_free_external); return; } @@ -1654,10 +1672,10 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) * will still always have a NUL at the end, even if we might * be overwriting an internal NUL character */ - dentry->d_iname[DNAME_INLINE_LEN-1] = 0; + dentry->d_shortname.string[DNAME_INLINE_LEN-1] = 0; if (unlikely(!name)) { name = &slash_name; - dname = dentry->d_iname; + dname = dentry->d_shortname.string; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); struct external_name *p = kmalloc(size + name->len, @@ -1667,10 +1685,10 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) kmem_cache_free(dentry_cache, dentry); return NULL; } - atomic_set(&p->u.count, 1); + atomic_set(&p->count, 1); dname = p->name; } else { - dname = dentry->d_iname; + dname = dentry->d_shortname.string; } dentry->d_name.len = name->len; @@ -2728,10 +2746,9 @@ static void swap_names(struct dentry *dentry, struct dentry *target) * dentry:internal, target:external. Steal target's * storage and make target internal. */ - memcpy(target->d_iname, dentry->d_name.name, - dentry->d_name.len + 1); dentry->d_name.name = target->d_name.name; - target->d_name.name = target->d_iname; + target->d_shortname = dentry->d_shortname; + target->d_name.name = target->d_shortname.string; } } else { if (unlikely(dname_external(dentry))) { @@ -2739,20 +2756,16 @@ static void swap_names(struct dentry *dentry, struct dentry *target) * dentry:external, target:internal. Give dentry's * storage to target and make dentry internal */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); target->d_name.name = dentry->d_name.name; - dentry->d_name.name = dentry->d_iname; + dentry->d_shortname = target->d_shortname; + dentry->d_name.name = dentry->d_shortname.string; } else { /* * Both are internal. */ - unsigned int i; - BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { - swap(((long *) &dentry->d_iname)[i], - ((long *) &target->d_iname)[i]); - } + for (int i = 0; i < DNAME_INLINE_WORDS; i++) + swap(dentry->d_shortname.words[i], + target->d_shortname.words[i]); } } swap(dentry->d_name.hash_len, target->d_name.hash_len); @@ -2764,16 +2777,15 @@ static void copy_name(struct dentry *dentry, struct dentry *target) if (unlikely(dname_external(dentry))) old_name = external_name(dentry); if (unlikely(dname_external(target))) { - atomic_inc(&external_name(target)->u.count); + atomic_inc(&external_name(target)->count); dentry->d_name = target->d_name; } else { - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.name = dentry->d_iname; + dentry->d_shortname = target->d_shortname; + dentry->d_name.name = dentry->d_shortname.string; dentry->d_name.hash_len = target->d_name.hash_len; } - if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) - kfree_rcu(old_name, u.head); + if (old_name && likely(atomic_dec_and_test(&old_name->count))) + kfree_rcu(old_name, head); } /* @@ -2954,7 +2966,12 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: + if (alias->d_op->d_unalias_trylock && + !alias->d_op->d_unalias_trylock(alias)) + goto out_err; __d_move(alias, dentry, false); + if (alias->d_op->d_unalias_unlock) + alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: if (m2) @@ -3102,12 +3119,12 @@ void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; - BUG_ON(dentry->d_name.name != dentry->d_iname || + BUG_ON(dname_external(dentry) || !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", + dentry->d_name.len = sprintf(dentry->d_shortname.string, "#%llu", (unsigned long long)inode->i_ino); spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_parent->d_lock); @@ -3195,7 +3212,7 @@ static void __init dcache_init(void) */ dentry_cache = KMEM_CACHE_USERCOPY(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT, - d_iname); + d_shortname.string); /* Hash may have been set up in dcache_init_early */ if (!hashdist) diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index acaa0825e9bb..1dfd5b81d831 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -17,7 +17,9 @@ /** * ecryptfs_d_revalidate - revalidate an ecryptfs dentry - * @dentry: The ecryptfs dentry + * @dir: inode of expected parent + * @name: expected name + * @dentry: dentry to revalidate * @flags: lookup flags * * Called when the VFS needs to revalidate a dentry. This @@ -28,7 +30,8 @@ * Returns 1 if valid, 0 otherwise. * */ -static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int ecryptfs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); int rc = 1; @@ -36,8 +39,15 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & LOOKUP_RCU) return -ECHILD; - if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) - rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); + if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) { + struct inode *lower_dir = ecryptfs_inode_to_lower(dir); + struct name_snapshot n; + + take_dentry_name_snapshot(&n, lower_dentry); + rc = lower_dentry->d_op->d_revalidate(lower_dir, &n.name, + lower_dentry, flags); + release_dentry_name_snapshot(&n); + } if (d_really_is_positive(dentry)) { struct inode *inode = d_inode(dentry); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 099f80645072..691dd77b6ab5 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -31,10 +31,9 @@ static inline void exfat_d_version_set(struct dentry *dentry, * If it happened, the negative dentry isn't actually negative anymore. So, * drop it. */ -static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) +static int exfat_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - int ret; - if (flags & LOOKUP_RCU) return -ECHILD; @@ -58,11 +57,7 @@ static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; - spin_lock(&dentry->d_lock); - ret = inode_eq_iversion(d_inode(dentry->d_parent), - exfat_d_version(dentry)); - spin_unlock(&dentry->d_lock); - return ret; + return inode_eq_iversion(dir, exfat_d_version(dentry)); } /* returns the length of a struct qstr, ignoring trailing dots if necessary */ diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 26c4fc37edcf..da4263a14a20 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -322,9 +322,7 @@ restart: WARN_ON(!list_empty(&ei->i_fc_dilist)); spin_unlock(&sbi->s_fc_lock); - if (fc_dentry->fcd_name.name && - fc_dentry->fcd_name.len > DNAME_INLINE_LEN) - kfree(fc_dentry->fcd_name.name); + release_dentry_name_snapshot(&fc_dentry->fcd_name); kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); return; @@ -449,22 +447,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode, node->fcd_op = dentry_update->op; node->fcd_parent = dir->i_ino; node->fcd_ino = inode->i_ino; - if (dentry->d_name.len > DNAME_INLINE_LEN) { - node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); - if (!node->fcd_name.name) { - kmem_cache_free(ext4_fc_dentry_cachep, node); - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, handle); - mutex_lock(&ei->i_fc_lock); - return -ENOMEM; - } - memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, - dentry->d_name.len); - } else { - memcpy(node->fcd_iname, dentry->d_name.name, - dentry->d_name.len); - node->fcd_name.name = node->fcd_iname; - } - node->fcd_name.len = dentry->d_name.len; + take_dentry_name_snapshot(&node->fcd_name, dentry); INIT_LIST_HEAD(&node->fcd_dilist); spin_lock(&sbi->s_fc_lock); if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || @@ -832,7 +815,7 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, { struct ext4_fc_dentry_info fcd; struct ext4_fc_tl tl; - int dlen = fc_dentry->fcd_name.len; + int dlen = fc_dentry->fcd_name.name.len; u8 *dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); @@ -847,7 +830,7 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, dst += EXT4_FC_TAG_BASE_LEN; memcpy(dst, &fcd, sizeof(fcd)); dst += sizeof(fcd); - memcpy(dst, fc_dentry->fcd_name.name, dlen); + memcpy(dst, fc_dentry->fcd_name.name.name, dlen); return true; } @@ -1328,9 +1311,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) list_del_init(&fc_dentry->fcd_dilist); spin_unlock(&sbi->s_fc_lock); - if (fc_dentry->fcd_name.name && - fc_dentry->fcd_name.len > DNAME_INLINE_LEN) - kfree(fc_dentry->fcd_name.name); + release_dentry_name_snapshot(&fc_dentry->fcd_name); kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); spin_lock(&sbi->s_fc_lock); } diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 2fadb2c4780c..3bd534e4dbbf 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -109,8 +109,7 @@ struct ext4_fc_dentry_update { int fcd_op; /* Type of update create / unlink / link */ int fcd_parent; /* Parent inode number */ int fcd_ino; /* Inode number */ - struct qstr fcd_name; /* Dirent name */ - unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */ + struct name_snapshot fcd_name; /* Dirent name */ struct list_head fcd_list; struct list_head fcd_dilist; }; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 15bf32c21ac0..926c26e90ef8 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -43,17 +43,13 @@ static inline void vfat_d_version_set(struct dentry *dentry, * If it happened, the negative dentry isn't actually negative * anymore. So, drop it. */ -static int vfat_revalidate_shortname(struct dentry *dentry) +static bool vfat_revalidate_shortname(struct dentry *dentry, struct inode *dir) { - int ret = 1; - spin_lock(&dentry->d_lock); - if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) - ret = 0; - spin_unlock(&dentry->d_lock); - return ret; + return inode_eq_iversion(dir, vfat_d_version(dentry)); } -static int vfat_revalidate(struct dentry *dentry, unsigned int flags) +static int vfat_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; @@ -61,10 +57,11 @@ static int vfat_revalidate(struct dentry *dentry, unsigned int flags) /* This is not negative dentry. Always valid. */ if (d_really_is_positive(dentry)) return 1; - return vfat_revalidate_shortname(dentry); + return vfat_revalidate_shortname(dentry, dir); } -static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) +static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; @@ -97,7 +94,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; - return vfat_revalidate_shortname(dentry); + return vfat_revalidate_shortname(dentry, dir); } /* returns the length of a struct qstr, ignoring trailing dots */ diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index be693a8a1010..198862b086ff 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -175,10 +175,12 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, memset(outarg, 0, sizeof(struct fuse_entry_out)); args->opcode = FUSE_LOOKUP; args->nodeid = nodeid; - args->in_numargs = 2; + args->in_numargs = 3; fuse_set_zero_arg0(args); - args->in_args[1].size = name->len + 1; + args->in_args[1].size = name->len; args->in_args[1].value = name->name; + args->in_args[2].size = 1; + args->in_args[2].value = ""; args->out_numargs = 1; args->out_args[0].size = sizeof(struct fuse_entry_out); args->out_args[0].value = outarg; @@ -193,10 +195,10 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, * the lookup once more. If the lookup results in the same inode, * then refresh the attributes, timeouts and mark the dentry valid. */ -static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) +static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *entry, unsigned int flags) { struct inode *inode; - struct dentry *parent; struct fuse_mount *fm; struct fuse_inode *fi; int ret; @@ -228,11 +230,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version = fuse_get_attr_version(fm->fc); - parent = dget_parent(entry); - fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), - &entry->d_name, &outarg); + fuse_lookup_init(fm->fc, &args, get_node_id(dir), + name, &outarg); ret = fuse_simple_request(fm, &args); - dput(parent); /* Zero nodeid is same as -ENOENT */ if (!ret && !outarg.nodeid) ret = -ENOENT; @@ -266,9 +266,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state)) return -ECHILD; } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) { - parent = dget_parent(entry); - fuse_advise_use_readdirplus(d_inode(parent)); - dput(parent); + fuse_advise_use_readdirplus(dir); } } ret = 1; diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 2e215e8c3c88..95050e719233 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -21,7 +21,9 @@ /** * gfs2_drevalidate - Check directory lookup consistency - * @dentry: the mapping to check + * @dir: expected parent directory inode + * @name: expexted name + * @dentry: dentry to check * @flags: lookup flags * * Check to make sure the lookup necessary to arrive at this inode from its @@ -30,50 +32,43 @@ * Returns: 1 if the dentry is ok, 0 if it isn't */ -static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) +static int gfs2_drevalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - struct dentry *parent; - struct gfs2_sbd *sdp; - struct gfs2_inode *dip; + struct gfs2_sbd *sdp = GFS2_SB(dir); + struct gfs2_inode *dip = GFS2_I(dir); struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; - int error, valid = 0; + int error, valid; int had_lock = 0; if (flags & LOOKUP_RCU) return -ECHILD; - parent = dget_parent(dentry); - sdp = GFS2_SB(d_inode(parent)); - dip = GFS2_I(d_inode(parent)); inode = d_inode(dentry); if (inode) { if (is_bad_inode(inode)) - goto out; + return 0; ip = GFS2_I(inode); } - if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) { - valid = 1; - goto out; - } + if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) + return 1; had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); if (!had_lock) { error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) - goto out; + return 0; } - error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip); + error = gfs2_dir_check(dir, name, ip); valid = inode ? !error : (error == -ENOENT); if (!had_lock) gfs2_glock_dq_uninit(&d_gh); -out: - dput(parent); return valid; } diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 76fa02e3835b..ef54fc8093cf 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -13,7 +13,8 @@ /* dentry case-handling: just lowercase everything */ -static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags) +static int hfs_revalidate_dentry(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int diff; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7e51d2cec64b..e0741e468956 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -95,32 +95,17 @@ __uml_setup("hostfs=", hostfs_args, static char *__dentry_name(struct dentry *dentry, char *name) { char *p = dentry_path_raw(dentry, name, PATH_MAX); - char *root; - size_t len; - struct hostfs_fs_info *fsi; - - fsi = dentry->d_sb->s_fs_info; - root = fsi->host_root_path; - len = strlen(root); - if (IS_ERR(p)) { - __putname(name); - return NULL; - } + struct hostfs_fs_info *fsi = dentry->d_sb->s_fs_info; + char *root = fsi->host_root_path; + size_t len = strlen(root); - /* - * This function relies on the fact that dentry_path_raw() will place - * the path name at the end of the provided buffer. - */ - BUG_ON(p + strlen(p) + 1 != name + PATH_MAX); - - strscpy(name, root, PATH_MAX); - if (len > p - name) { + if (IS_ERR(p) || len > p - name) { __putname(name); return NULL; } - if (p > name + len) - strcpy(name + len, p); + memcpy(name, root, len); + memmove(name + len, p, name + PATH_MAX - p); return name; } @@ -410,38 +395,33 @@ static const struct file_operations hostfs_dir_fops = { .fsync = hostfs_fsync, }; -static int hostfs_writepage(struct page *page, struct writeback_control *wbc) +static int hostfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - char *buffer; - loff_t base = page_offset(page); - int count = PAGE_SIZE; - int end_index = inode->i_size >> PAGE_SHIFT; - int err; - - if (page->index >= end_index) - count = inode->i_size & (PAGE_SIZE-1); - - buffer = kmap_local_page(page); - - err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); - if (err != count) { - if (err >= 0) - err = -EIO; - mapping_set_error(mapping, err); - goto out; + struct folio *folio = NULL; + loff_t i_size = i_size_read(inode); + int err = 0; + + while ((folio = writeback_iter(mapping, wbc, folio, &err))) { + loff_t pos = folio_pos(folio); + size_t count = folio_size(folio); + char *buffer; + int ret; + + if (count > i_size - pos) + count = i_size - pos; + + buffer = kmap_local_folio(folio, 0); + ret = write_file(HOSTFS_I(inode)->fd, &pos, buffer, count); + kunmap_local(buffer); + folio_unlock(folio); + if (ret != count) { + err = ret < 0 ? ret : -EIO; + mapping_set_error(mapping, err); + } } - if (base > inode->i_size) - inode->i_size = base; - - err = 0; - - out: - kunmap_local(buffer); - unlock_page(page); - return err; } @@ -506,11 +486,12 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping, } static const struct address_space_operations hostfs_aops = { - .writepage = hostfs_writepage, + .writepages = hostfs_writepages, .read_folio = hostfs_read_folio, .dirty_folio = filemap_dirty_folio, .write_begin = hostfs_write_begin, .write_end = hostfs_write_end, + .migrate_folio = filemap_migrate_folio, }; static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st) diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index d68a4e6ac345..fc8ede43afde 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1576,7 +1576,8 @@ out: return result; } -static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags) +static int jfs_ci_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { /* * This is not negative dentry. Always valid. diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 458519e416fe..5f0f8b95f44c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1109,7 +1109,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, return ERR_PTR(rc); } -static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) +static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct kernfs_node *kn; struct kernfs_root *root; diff --git a/fs/libfs.c b/fs/libfs.c index 5b6120b19e99..8444f5cc4064 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1782,7 +1782,7 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, { const struct dentry *parent; const struct inode *dir; - char strbuf[DNAME_INLINE_LEN]; + union shortname_store strbuf; struct qstr qstr; /* @@ -1802,22 +1802,23 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, if (!dir || !IS_CASEFOLDED(dir)) return 1; + qstr.len = len; + qstr.name = str; /* * If the dentry name is stored in-line, then it may be concurrently * modified by a rename. If this happens, the VFS will eventually retry * the lookup, so it doesn't matter what ->d_compare() returns. * However, it's unsafe to call utf8_strncasecmp() with an unstable * string. Therefore, we have to copy the name into a temporary buffer. + * As above, len is guaranteed to match str, so the shortname case + * is exactly when str points to ->d_shortname. */ - if (len <= DNAME_INLINE_LEN - 1) { - memcpy(strbuf, str, len); - strbuf[len] = 0; - str = strbuf; + if (qstr.name == dentry->d_shortname.string) { + strbuf = dentry->d_shortname; // NUL is guaranteed to be in there + qstr.name = strbuf.string; /* prevent compiler from optimizing out the temporary buffer */ barrier(); } - qstr.len = len; - qstr.name = str; return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr); } diff --git a/fs/namei.c b/fs/namei.c index 8c82afddd2ad..3ab9440c5b93 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -921,10 +921,11 @@ out_dput: return false; } -static inline int d_revalidate(struct dentry *dentry, unsigned int flags) +static inline int d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - return dentry->d_op->d_revalidate(dentry, flags); + return dentry->d_op->d_revalidate(dir, name, dentry, flags); else return 1; } @@ -1652,7 +1653,7 @@ static struct dentry *lookup_dcache(const struct qstr *name, { struct dentry *dentry = d_lookup(dir, name); if (dentry) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(dir->d_inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) d_invalidate(dentry); @@ -1737,19 +1738,20 @@ static struct dentry *lookup_fast(struct nameidata *nd) if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, + dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) @@ -1777,7 +1779,7 @@ again: if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { - int error = d_revalidate(dentry, flags); + int error = d_revalidate(inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); @@ -3575,7 +3577,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (d_in_lookup(dentry)) break; - error = d_revalidate(dentry, nd->flags); + error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags); if (likely(error > 0)) break; if (error) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 492cffd9d3d8..2b04038b0e40 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1672,7 +1672,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, return nfs_lookup_revalidate_done(dir, dentry, inode, 1); } -static int nfs_lookup_revalidate_dentry(struct inode *dir, +static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name, struct dentry *dentry, struct inode *inode, unsigned int flags) { @@ -1690,7 +1690,7 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir, goto out; dir_verifier = nfs_save_change_attribute(dir); - ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr); if (ret < 0) goto out; @@ -1732,8 +1732,8 @@ out: * cached dentry and do a new lookup. */ static int -nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, - unsigned int flags) +nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int error = 0; @@ -1775,7 +1775,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, if (NFS_STALE(inode)) goto out_bad; - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); out_valid: return nfs_lookup_revalidate_done(dir, dentry, inode, 1); out_bad: @@ -1785,38 +1785,26 @@ out_bad: } static int -__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, - int (*reval)(struct inode *, struct dentry *, unsigned int)) +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent; - struct inode *dir; - int ret; - if (flags & LOOKUP_RCU) { if (dentry->d_fsdata == NFS_FSDATA_BLOCKED) return -ECHILD; - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - ret = reval(dir, dentry, flags); - if (parent != READ_ONCE(dentry->d_parent)) - return -ECHILD; } else { /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, smp_load_acquire(&dentry->d_fsdata) != NFS_FSDATA_BLOCKED); - parent = dget_parent(dentry); - ret = reval(d_inode(parent), dentry, flags); - dput(parent); } - return ret; + return 0; } -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { - return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); + if (__nfs_lookup_revalidate(dentry, flags)) + return -ECHILD; + return nfs_do_lookup_revalidate(dir, name, dentry, flags); } static void block_revalidate(struct dentry *dentry) @@ -1982,7 +1970,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, + fhandle, fattr); if (error == -ENOENT) { if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) dir_verifier = inode_peek_iversion_raw(dir); @@ -2025,7 +2014,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); #if IS_ENABLED(CONFIG_NFS_V4) -static int nfs4_lookup_revalidate(struct dentry *, unsigned int); +static int nfs4_lookup_revalidate(struct inode *, const struct qstr *, + struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -2214,11 +2204,14 @@ no_open: EXPORT_SYMBOL_GPL(nfs_atomic_open); static int -nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, - unsigned int flags) +nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; + if (__nfs_lookup_revalidate(dentry, flags)) + return -ECHILD; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) @@ -2254,16 +2247,10 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, reval_dentry: if (flags & LOOKUP_RCU) return -ECHILD; - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); full_reval: - return nfs_do_lookup_revalidate(dir, dentry, flags); -} - -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) -{ - return __nfs_lookup_revalidate(dentry, flags, - nfs4_do_lookup_revalidate); + return nfs_do_lookup_revalidate(dir, name, dentry, flags); } #endif /* CONFIG_NFSV4 */ @@ -2319,7 +2306,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, + fhandle, fattr); if (error) goto out_error; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2d53574da605..973aed9cc5fe 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -308,7 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name, ctx->mntfh, ctx->clone_data.fattr); dput(parent); if (err != 0) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7359e1a3bd84..0c3bc98cd999 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, } static int -nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { unsigned short task_flags = 0; @@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call lookup %pd2\n", dentry); - return __nfs3_proc_lookup(dir, dentry->d_name.name, - dentry->d_name.len, fhandle, fattr, + return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr, task_flags); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d615d520f8cf..df9669d4ded7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4544,15 +4544,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); int status; struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, .dir_fh = NFS_FH(dir), - .name = &dentry->d_name, + .name = name, }; struct nfs4_lookup_res res = { .server = server, @@ -4594,17 +4594,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; struct rpc_clnt *client = *clnt; - const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr); + err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4639,13 +4638,13 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4660,7 +4659,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name, + fhandle, fattr); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 6c09cd090c34..77920a2e3cef 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct inode *dir, struct dentry *dentry, +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len + .name = name->name, + .len = name->len }; struct nfs_diropok res = { .fh = fhandle, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index e8015d24a82c..6613b8fcceb0 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -1186,7 +1186,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ - size += n << blkbits; + size += (u64)n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( @@ -1199,14 +1199,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } blkoff += n; } diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 8d789b017fa9..af94e3737470 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -787,7 +787,8 @@ pack_runs: if (err) goto out; - attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); + attr = mi_find_attr(ni, mi, NULL, type, name, name_len, + &le->id); if (!attr) { err = -EINVAL; goto bad_inode; @@ -1181,7 +1182,7 @@ repack: goto out; } - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, &le->id); + attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, &le->id); if (!attr) { err = -EINVAL; goto out; @@ -1406,7 +1407,7 @@ int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, */ if (!attr->non_res) { if (vbo[1] + bytes_per_off > le32_to_cpu(attr->res.data_size)) { - ntfs_inode_err(&ni->vfs_inode, "is corrupted"); + _ntfs_bad_inode(&ni->vfs_inode); return -EINVAL; } addr = resident_data(attr); @@ -1796,7 +1797,7 @@ repack: goto out; } - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, + attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, &le->id); if (!attr) { err = -EINVAL; @@ -2041,8 +2042,8 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) } /* Look for required attribute. */ - attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, - 0, &le->id); + attr = mi_find_attr(ni, mi, NULL, ATTR_DATA, + NULL, 0, &le->id); if (!attr) { err = -EINVAL; goto out; @@ -2587,7 +2588,7 @@ int attr_force_nonresident(struct ntfs_inode *ni) attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, &mi); if (!attr) { - ntfs_bad_inode(&ni->vfs_inode, "no data attribute"); + _ntfs_bad_inode(&ni->vfs_inode); return -ENOENT; } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index fc6a8aa29e3a..b6da80c69ca6 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -512,7 +512,7 @@ out: ctx->pos = pos; } else if (err < 0) { if (err == -EINVAL) - ntfs_inode_err(dir, "directory corrupted"); + _ntfs_bad_inode(dir); ctx->pos = eod; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 8b39d0ce5f28..5df6a0b5add9 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -75,7 +75,7 @@ struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni) { const struct ATTRIB *attr; - attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); + attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_STD, NULL, 0, NULL); return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO)) : NULL; } @@ -89,7 +89,7 @@ struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni) { const struct ATTRIB *attr; - attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); + attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_STD, NULL, 0, NULL); return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5)) : NULL; @@ -148,8 +148,10 @@ int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi) goto out; err = mi_get(ni->mi.sbi, rno, &r); - if (err) + if (err) { + _ntfs_bad_inode(&ni->vfs_inode); return err; + } ni_add_mi(ni, r); @@ -201,7 +203,8 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, *mi = &ni->mi; /* Look for required attribute in primary record. */ - return mi_find_attr(&ni->mi, attr, type, name, name_len, NULL); + return mi_find_attr(ni, &ni->mi, attr, type, name, name_len, + NULL); } /* First look for list entry of required type. */ @@ -217,7 +220,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, return NULL; /* Look for required attribute. */ - attr = mi_find_attr(m, NULL, type, name, name_len, &le->id); + attr = mi_find_attr(ni, m, NULL, type, name, name_len, &le->id); if (!attr) goto out; @@ -238,8 +241,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, return attr; out: - ntfs_inode_err(&ni->vfs_inode, "failed to parse mft record"); - ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(&ni->vfs_inode); return NULL; } @@ -259,7 +261,7 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, if (mi) *mi = &ni->mi; /* Enum attributes in primary record. */ - return mi_enum_attr(&ni->mi, attr); + return mi_enum_attr(ni, &ni->mi, attr); } /* Get next list entry. */ @@ -275,7 +277,7 @@ struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, *mi = mi2; /* Find attribute in loaded record. */ - return rec_find_attr_le(mi2, le2); + return rec_find_attr_le(ni, mi2, le2); } /* @@ -293,7 +295,8 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, if (!ni->attr_list.size) { if (pmi) *pmi = &ni->mi; - return mi_find_attr(&ni->mi, NULL, type, name, name_len, NULL); + return mi_find_attr(ni, &ni->mi, NULL, type, name, name_len, + NULL); } le = al_find_ex(ni, NULL, type, name, name_len, NULL); @@ -319,7 +322,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, if (pmi) *pmi = mi; - attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); + attr = mi_find_attr(ni, mi, NULL, type, name, name_len, &le->id); if (!attr) return NULL; @@ -330,6 +333,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, vcn <= le64_to_cpu(attr->nres.evcn)) return attr; + _ntfs_bad_inode(&ni->vfs_inode); return NULL; } @@ -398,7 +402,8 @@ int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, int diff; if (base_only || type == ATTR_LIST || !ni->attr_list.size) { - attr = mi_find_attr(&ni->mi, NULL, type, name, name_len, id); + attr = mi_find_attr(ni, &ni->mi, NULL, type, name, name_len, + id); if (!attr) return -ENOENT; @@ -437,7 +442,7 @@ next_le2: al_remove_le(ni, le); - attr = mi_find_attr(mi, NULL, type, name, name_len, id); + attr = mi_find_attr(ni, mi, NULL, type, name, name_len, id); if (!attr) return -ENOENT; @@ -485,7 +490,7 @@ ni_ins_new_attr(struct ntfs_inode *ni, struct mft_inode *mi, name = le->name; } - attr = mi_insert_attr(mi, type, name, name_len, asize, name_off); + attr = mi_insert_attr(ni, mi, type, name, name_len, asize, name_off); if (!attr) { if (le_added) al_remove_le(ni, le); @@ -673,7 +678,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) if (err) return err; - attr_list = mi_find_attr(&ni->mi, NULL, ATTR_LIST, NULL, 0, NULL); + attr_list = mi_find_attr(ni, &ni->mi, NULL, ATTR_LIST, NULL, 0, NULL); if (!attr_list) return 0; @@ -695,7 +700,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) if (!mi) return 0; - attr = mi_find_attr(mi, NULL, le->type, le_name(le), + attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) return 0; @@ -731,7 +736,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) goto out; } - attr = mi_find_attr(mi, NULL, le->type, le_name(le), + attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) { /* Should never happened, 'cause already checked. */ @@ -740,7 +745,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) asize = le32_to_cpu(attr->size); /* Insert into primary record. */ - attr_ins = mi_insert_attr(&ni->mi, le->type, le_name(le), + attr_ins = mi_insert_attr(ni, &ni->mi, le->type, le_name(le), le->name_len, asize, le16_to_cpu(attr->name_off)); if (!attr_ins) { @@ -768,7 +773,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) if (!mi) continue; - attr = mi_find_attr(mi, NULL, le->type, le_name(le), + attr = mi_find_attr(ni, mi, NULL, le->type, le_name(le), le->name_len, &le->id); if (!attr) continue; @@ -831,7 +836,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) free_b = 0; attr = NULL; - for (; (attr = mi_enum_attr(&ni->mi, attr)); le = Add2Ptr(le, sz)) { + for (; (attr = mi_enum_attr(ni, &ni->mi, attr)); le = Add2Ptr(le, sz)) { sz = le_size(attr->name_len); le->type = attr->type; le->size = cpu_to_le16(sz); @@ -886,7 +891,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) u32 asize = le32_to_cpu(b->size); u16 name_off = le16_to_cpu(b->name_off); - attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off), + attr = mi_insert_attr(ni, mi, b->type, Add2Ptr(b, name_off), b->name_len, asize, name_off); if (!attr) goto out; @@ -909,7 +914,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) goto out; } - attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0, + attr = mi_insert_attr(ni, &ni->mi, ATTR_LIST, NULL, 0, lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT); if (!attr) goto out; @@ -993,13 +998,13 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, mi = rb_entry(node, struct mft_inode, node); if (is_mft_data && - (mi_enum_attr(mi, NULL) || + (mi_enum_attr(ni, mi, NULL) || vbo <= ((u64)mi->rno << sbi->record_bits))) { /* We can't accept this record 'cause MFT's bootstrapping. */ continue; } if (is_mft && - mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, NULL)) { + mi_find_attr(ni, mi, NULL, ATTR_DATA, NULL, 0, NULL)) { /* * This child record already has a ATTR_DATA. * So it can't accept any other records. @@ -1008,7 +1013,7 @@ static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, } if ((type != ATTR_NAME || name_len) && - mi_find_attr(mi, NULL, type, name, name_len, NULL)) { + mi_find_attr(ni, mi, NULL, type, name, name_len, NULL)) { /* Only indexed attributes can share same record. */ continue; } @@ -1157,7 +1162,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, /* Estimate the result of moving all possible attributes away. */ attr = NULL; - while ((attr = mi_enum_attr(&ni->mi, attr))) { + while ((attr = mi_enum_attr(ni, &ni->mi, attr))) { if (attr->type == ATTR_STD) continue; if (attr->type == ATTR_LIST) @@ -1175,7 +1180,7 @@ static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, attr = NULL; for (;;) { - attr = mi_enum_attr(&ni->mi, attr); + attr = mi_enum_attr(ni, &ni->mi, attr); if (!attr) { /* We should never be here 'cause we have already check this case. */ err = -EINVAL; @@ -1259,7 +1264,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) { mi = rb_entry(node, struct mft_inode, node); - attr = mi_enum_attr(mi, NULL); + attr = mi_enum_attr(ni, mi, NULL); if (!attr) { mft_min = mi->rno; @@ -1280,7 +1285,7 @@ static int ni_expand_mft_list(struct ntfs_inode *ni) ni_remove_mi(ni, mi_new); } - attr = mi_find_attr(&ni->mi, NULL, ATTR_DATA, NULL, 0, NULL); + attr = mi_find_attr(ni, &ni->mi, NULL, ATTR_DATA, NULL, 0, NULL); if (!attr) { err = -EINVAL; goto out; @@ -1397,7 +1402,7 @@ int ni_expand_list(struct ntfs_inode *ni) continue; /* Find attribute in primary record. */ - attr = rec_find_attr_le(&ni->mi, le); + attr = rec_find_attr_le(ni, &ni->mi, le); if (!attr) { err = -EINVAL; goto out; @@ -1604,8 +1609,8 @@ int ni_delete_all(struct ntfs_inode *ni) roff = le16_to_cpu(attr->nres.run_off); if (roff > asize) { - _ntfs_bad_inode(&ni->vfs_inode); - return -EINVAL; + /* ni_enum_attr_ex checks this case. */ + continue; } /* run==1 means unpack and deallocate. */ @@ -2726,9 +2731,10 @@ int ni_write_frame(struct ntfs_inode *ni, struct page **pages, { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; + struct folio *folio = page_folio(pages[0]); u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; u32 frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT; - u64 frame_vbo = (u64)pages[0]->index << PAGE_SHIFT; + u64 frame_vbo = folio_pos(folio); CLST frame = frame_vbo >> frame_bits; char *frame_ondisk = NULL; struct page **pages_disk = NULL; @@ -3343,7 +3349,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) if (!mi->dirty) continue; - is_empty = !mi_enum_attr(mi, NULL); + is_empty = !mi_enum_attr(ni, mi, NULL); if (is_empty) clear_rec_inuse(mi->mrec); diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 03471bc9371c..938d351ebac7 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -908,7 +908,11 @@ void ntfs_bad_inode(struct inode *inode, const char *hint) ntfs_inode_err(inode, "%s", hint); make_bad_inode(inode); - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + /* Avoid recursion if bad inode is $Volume. */ + if (inode->i_ino != MFT_REC_VOL && + !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING)) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + } } /* diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 9089c58a005c..7eb9fae22f8d 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1094,8 +1094,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, ok: if (!index_buf_check(ib, bytes, &vbn)) { - ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); - ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(&ni->vfs_inode); err = -EINVAL; goto out; } @@ -1117,8 +1116,7 @@ ok: out: if (err == -E_NTFS_CORRUPT) { - ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); - ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + _ntfs_bad_inode(&ni->vfs_inode); err = -EINVAL; } diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index be04d2845bb7..a1e11228dafd 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -410,6 +410,9 @@ end_enum: if (!std5) goto out; + if (is_bad_inode(inode)) + goto out; + if (!is_match && name) { err = -ENOENT; goto out; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index cd8e8374bb5a..382820464dee 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -745,23 +745,24 @@ int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi); void mi_put(struct mft_inode *mi); int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno); int mi_read(struct mft_inode *mi, bool is_mft); -struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr); -// TODO: id? -struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, - enum ATTR_TYPE type, const __le16 *name, - u8 name_len, const __le16 *id); -static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec, +struct ATTRIB *mi_enum_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr); +struct ATTRIB *mi_find_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, const __le16 *id); +static inline struct ATTRIB *rec_find_attr_le(struct ntfs_inode *ni, + struct mft_inode *rec, struct ATTR_LIST_ENTRY *le) { - return mi_find_attr(rec, NULL, le->type, le_name(le), le->name_len, + return mi_find_attr(ni, rec, NULL, le->type, le_name(le), le->name_len, &le->id); } int mi_write(struct mft_inode *mi, int wait); int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, __le16 flags, bool is_mft); -struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, u32 asize, - u16 name_off); +struct ATTRIB *mi_insert_attr(struct ntfs_inode *ni, struct mft_inode *mi, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, u32 asize, u16 name_off); bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, struct ATTRIB *attr); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 61d53d39f3b9..714c7ecedca8 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -31,7 +31,7 @@ static inline int compare_attr(const struct ATTRIB *left, enum ATTR_TYPE type, * * Return: Unused attribute id that is less than mrec->next_attr_id. */ -static __le16 mi_new_attt_id(struct mft_inode *mi) +static __le16 mi_new_attt_id(struct ntfs_inode *ni, struct mft_inode *mi) { u16 free_id, max_id, t16; struct MFT_REC *rec = mi->mrec; @@ -52,7 +52,7 @@ static __le16 mi_new_attt_id(struct mft_inode *mi) attr = NULL; for (;;) { - attr = mi_enum_attr(mi, attr); + attr = mi_enum_attr(ni, mi, attr); if (!attr) { rec->next_attr_id = cpu_to_le16(max_id + 1); mi->dirty = true; @@ -195,7 +195,8 @@ out: * NOTE: mi->mrec - memory of size sbi->record_size * here we sure that mi->mrec->total == sbi->record_size (see mi_read) */ -struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) +struct ATTRIB *mi_enum_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; u32 used = le32_to_cpu(rec->used); @@ -209,11 +210,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) off = le16_to_cpu(rec->attr_off); if (used > total) - return NULL; + goto out; if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 || !IS_ALIGNED(off, 8)) { - return NULL; + goto out; } /* Skip non-resident records. */ @@ -243,7 +244,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) */ if (off + 8 > used) { static_assert(ALIGN(sizeof(enum ATTR_TYPE), 8) == 8); - return NULL; + goto out; } if (attr->type == ATTR_END) { @@ -254,112 +255,116 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* 0x100 is last known attribute for now. */ t32 = le32_to_cpu(attr->type); if (!t32 || (t32 & 0xf) || (t32 > 0x100)) - return NULL; + goto out; /* attributes in record must be ordered by type */ if (t32 < prev_type) - return NULL; + goto out; asize = le32_to_cpu(attr->size); if (!IS_ALIGNED(asize, 8)) - return NULL; + goto out; /* Check overflow and boundary. */ if (off + asize < off || off + asize > used) - return NULL; + goto out; /* Can we use the field attr->non_res. */ if (off + 9 > used) - return NULL; + goto out; /* Check size of attribute. */ if (!attr->non_res) { /* Check resident fields. */ if (asize < SIZEOF_RESIDENT) - return NULL; + goto out; t16 = le16_to_cpu(attr->res.data_off); if (t16 > asize) - return NULL; + goto out; if (le32_to_cpu(attr->res.data_size) > asize - t16) - return NULL; + goto out; t32 = sizeof(short) * attr->name_len; if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) - return NULL; + goto out; return attr; } /* Check nonresident fields. */ if (attr->non_res != 1) - return NULL; + goto out; /* Can we use memory including attr->nres.valid_size? */ if (asize < SIZEOF_NONRESIDENT) - return NULL; + goto out; t16 = le16_to_cpu(attr->nres.run_off); if (t16 > asize) - return NULL; + goto out; t32 = sizeof(short) * attr->name_len; if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) - return NULL; + goto out; /* Check start/end vcn. */ if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1) - return NULL; + goto out; data_size = le64_to_cpu(attr->nres.data_size); if (le64_to_cpu(attr->nres.valid_size) > data_size) - return NULL; + goto out; alloc_size = le64_to_cpu(attr->nres.alloc_size); if (data_size > alloc_size) - return NULL; + goto out; t32 = mi->sbi->cluster_mask; if (alloc_size & t32) - return NULL; + goto out; if (!attr->nres.svcn && is_attr_ext(attr)) { /* First segment of sparse/compressed attribute */ /* Can we use memory including attr->nres.total_size? */ if (asize < SIZEOF_NONRESIDENT_EX) - return NULL; + goto out; tot_size = le64_to_cpu(attr->nres.total_size); if (tot_size & t32) - return NULL; + goto out; if (tot_size > alloc_size) - return NULL; + goto out; } else { if (attr->nres.c_unit) - return NULL; + goto out; if (alloc_size > mi->sbi->volume.size) - return NULL; + goto out; } return attr; + +out: + _ntfs_bad_inode(&ni->vfs_inode); + return NULL; } /* * mi_find_attr - Find the attribute by type and name and id. */ -struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, - enum ATTR_TYPE type, const __le16 *name, - u8 name_len, const __le16 *id) +struct ATTRIB *mi_find_attr(struct ntfs_inode *ni, struct mft_inode *mi, + struct ATTRIB *attr, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, const __le16 *id) { u32 type_in = le32_to_cpu(type); u32 atype; next_attr: - attr = mi_enum_attr(mi, attr); + attr = mi_enum_attr(ni, mi, attr); if (!attr) return NULL; @@ -467,9 +472,9 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, * * Return: Not full constructed attribute or NULL if not possible to create. */ -struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, u32 asize, - u16 name_off) +struct ATTRIB *mi_insert_attr(struct ntfs_inode *ni, struct mft_inode *mi, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, u32 asize, u16 name_off) { size_t tail; struct ATTRIB *attr; @@ -488,7 +493,7 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, * at which we should insert it. */ attr = NULL; - while ((attr = mi_enum_attr(mi, attr))) { + while ((attr = mi_enum_attr(ni, mi, attr))) { int diff = compare_attr(attr, type, name, name_len, upcase); if (diff < 0) @@ -508,7 +513,7 @@ struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, tail = used - PtrOffset(rec, attr); } - id = mi_new_attt_id(mi); + id = mi_new_attt_id(ni, mi); memmove(Add2Ptr(attr, asize), attr, tail); memset(attr, 0, asize); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index a9b8688aaf30..1873bbbb7e5b 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) } -static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; /* if all else fails, just return false */ @@ -44,8 +45,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); osb = OCFS2_SB(dentry->d_sb); - trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len, - dentry->d_name.name); + trace_ocfs2_dentry_revalidate(dentry, name->len, name->name); /* For a negative dentry - * check the generation number of the parent and compare with the @@ -53,12 +53,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) */ if (inode == NULL) { unsigned long gen = (unsigned long) dentry->d_fsdata; - unsigned long pgen; - spin_lock(&dentry->d_lock); - pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; - spin_unlock(&dentry->d_lock); - trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, - dentry->d_name.name, + unsigned long pgen = OCFS2_I(dir)->ip_dir_lock_gen; + trace_ocfs2_dentry_revalidate_negative(name->len, name->name, pgen, gen); if (gen != pgen) goto bail; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index e0b91dbaa0ac..8bb5022f3082 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2285,7 +2285,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, mlog(ML_ERROR, "found superblock with incorrect block " "size bits: found %u, should be 9, 10, 11, or 12\n", blksz_bits); - } else if ((1 << le32_to_cpu(blksz_bits)) != blksz) { + } else if ((1 << blksz_bits) != blksz) { mlog(ML_ERROR, "found superblock with incorrect block " "size: found %u, should be %u\n", 1 << blksz_bits, blksz); } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 395a00ed8ac7..a19d1ad705db 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -13,10 +13,9 @@ #include "orangefs-kernel.h" /* Returns 1 if dentry can still be trusted, else 0. */ -static int orangefs_revalidate_lookup(struct dentry *dentry) +static int orangefs_revalidate_lookup(struct inode *parent_inode, const struct qstr *name, + struct dentry *dentry) { - struct dentry *parent_dentry = dget_parent(dentry); - struct inode *parent_inode = parent_dentry->d_inode; struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode); struct inode *inode = dentry->d_inode; struct orangefs_kernel_op_s *new_op; @@ -26,14 +25,14 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); - if (!new_op) { - ret = -ENOMEM; - goto out_put_parent; - } + if (!new_op) + return -ENOMEM; new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; - strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name); + /* op_alloc() leaves ->upcall zeroed */ + memcpy(new_op->upcall.req.lookup.d_name, name->name, + min(name->len, ORANGEFS_NAME_MAX - 1)); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", @@ -78,8 +77,6 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) ret = 1; out_release_op: op_release(new_op); -out_put_parent: - dput(parent_dentry); return ret; out_drop: gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n", @@ -92,7 +89,8 @@ out_drop: * * Should return 1 if dentry can still be trusted, else 0. */ -static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int orangefs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { int ret; unsigned long time = (unsigned long) dentry->d_fsdata; @@ -114,7 +112,7 @@ static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) * If this passes, the positive dentry still exists or the negative * dentry still does not exist. */ - if (!orangefs_revalidate_lookup(dentry)) + if (!orangefs_revalidate_lookup(dir, name, dentry)) return 0; /* We do not need to continue with negative dentries. */ diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 9729f071c5aa..f52073022fae 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -392,9 +392,9 @@ static ssize_t orangefs_debug_write(struct file *file, * Thwart users who try to jamb a ridiculous number * of bytes into the debug file... */ - if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) { + if (count > ORANGEFS_MAX_DEBUG_STRING_LEN) { silly = count; - count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1; + count = ORANGEFS_MAX_DEBUG_STRING_LEN; } buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fe511192f83c..86ae6f6da36b 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -91,7 +91,24 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) ret = d->d_op->d_weak_revalidate(d, flags); } else if (d->d_flags & DCACHE_OP_REVALIDATE) { - ret = d->d_op->d_revalidate(d, flags); + struct dentry *parent; + struct inode *dir; + struct name_snapshot n; + + if (flags & LOOKUP_RCU) { + parent = READ_ONCE(d->d_parent); + dir = d_inode_rcu(parent); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(d); + dir = d_inode(parent); + } + take_dentry_name_snapshot(&n, d); + ret = d->d_op->d_revalidate(dir, &n.name, d, flags); + release_dentry_name_snapshot(&n); + if (!(flags & LOOKUP_RCU)) + dput(parent); if (!ret) { if (!(flags & LOOKUP_RCU)) d_invalidate(d); @@ -127,7 +144,8 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, return ret; } -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return ovl_dentry_revalidate_common(dentry, flags, false); } diff --git a/fs/proc/base.c b/fs/proc/base.c index a50b222a5917..cd89e956c322 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2058,7 +2058,8 @@ void pid_update_inode(struct task_struct *task, struct inode *inode) * performed a setuid(), etc. * */ -static int pid_revalidate(struct dentry *dentry, unsigned int flags) +static int pid_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; @@ -2191,7 +2192,8 @@ static int dname_to_vma_addr(struct dentry *dentry, return 0; } -static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) +static int map_files_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 24baf23e864f..37aa778d1af7 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -140,7 +140,8 @@ static void tid_fd_update_inode(struct task_struct *task, struct inode *inode, security_task_to_inode(task, inode); } -static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) +static int tid_fd_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct task_struct *task; struct inode *inode; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index dbe82cf23ee4..8ec90826a49e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -216,7 +216,8 @@ void proc_free_inum(unsigned int inum) ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); } -static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) +static int proc_misc_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; @@ -343,7 +344,8 @@ static const struct file_operations proc_dir_operations = { .iterate_shared = proc_readdir, }; -static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) +static int proc_net_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return 0; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 27a283d85a6e..cc9d74a06ff0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -884,7 +884,8 @@ static const struct inode_operations proc_sys_dir_operations = { .getattr = proc_sys_getattr, }; -static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) +static int proc_sys_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 1822493dd084..d1e95632ac54 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -737,7 +737,8 @@ again: } static int -cifs_d_revalidate(struct dentry *direntry, unsigned int flags) +cifs_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *direntry, unsigned int flags) { struct inode *inode; int rc; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index cfc614c638da..53214499e384 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -457,7 +457,8 @@ static void tracefs_d_release(struct dentry *dentry) eventfs_d_release(dentry); } -static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) +static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct eventfs_inode *ei = dentry->d_fsdata; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 5cc69beaa62e..b01f382ce8db 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -863,7 +863,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) out: vfree(buf); - return; } void ubifs_dump_znode(const struct ubifs_info *c, @@ -946,16 +945,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index aa8837e6247c..f2cb214581fd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1932,7 +1932,6 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); out: vfree(buf); - return; } /** diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index 5f1a14d5b927..a859ac9b74ba 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -192,7 +192,8 @@ const struct file_operations vboxsf_dir_fops = { * This is called during name resolution/lookup to check if the @dentry in * the cache is still valid. the job is handled by vboxsf_inode_revalidate. */ -static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int vboxsf_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { if (flags & LOOKUP_RCU) return -ECHILD; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a0a9007cc1e3..9ebb53f031cd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -900,8 +900,22 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); -void blk_mq_freeze_queue(struct request_queue *q); -void blk_mq_unfreeze_queue(struct request_queue *q); +void blk_mq_freeze_queue_nomemsave(struct request_queue *q); +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); +static inline unsigned int __must_check +blk_mq_freeze_queue(struct request_queue *q) +{ + unsigned int memflags = memalloc_noio_save(); + + blk_mq_freeze_queue_nomemsave(q); + return memflags; +} +static inline void +blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) +{ + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); +} void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76f0a4e7c2e5..248416ecd01c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,7 +561,6 @@ struct request_queue { struct list_head flush_list; struct mutex sysfs_lock; - struct mutex sysfs_dir_lock; struct mutex limits_lock; /* @@ -605,8 +604,6 @@ struct request_queue { * Serializes all debugfs metadata operations using the above dentries. */ struct mutex debugfs_mutex; - - bool mq_sysfs_init_done; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 2d7d86f0290d..c7f2c63b3bc3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -504,20 +504,6 @@ struct ceph_mds_request_head_legacy { #define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head_old { - __le16 version; /* struct version */ - __le64 oldest_client_tid; - __le32 mdsmap_epoch; /* on client */ - __le32 flags; /* CEPH_MDS_FLAG_* */ - __u8 num_retry, num_fwd; /* count retry, fwd attempts */ - __le16 num_releases; /* # include cap/lease release records */ - __le32 op; /* mds op code */ - __le32 caller_uid, caller_gid; - __le64 ino; /* use this ino for openc, mkdir, mknod, - etc. (if replaying) */ - union ceph_mds_request_args_ext args; -} __attribute__ ((packed)); - struct ceph_mds_request_head { __le16 version; /* struct version */ __le64 oldest_client_tid; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index bdcec1732445..6a0a8f1c7c90 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,7 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bff956f7b2b9..9a1a30857763 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -68,16 +68,24 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 40 /* 192 bytes */ +# define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else -# define DNAME_INLINE_LEN 44 /* 128 bytes */ +# define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif +#define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) + +union shortname_store { + unsigned char string[DNAME_INLINE_LEN]; + unsigned long words[DNAME_INLINE_WORDS]; +}; + #define d_lock d_lockref.lock +#define d_iname d_shortname.string struct dentry { /* RCU lookup touched fields */ @@ -88,7 +96,7 @@ struct dentry { struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ - unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + union shortname_store d_shortname; /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ @@ -136,7 +144,8 @@ enum d_real_type { }; struct dentry_operations { - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -150,6 +159,8 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); } ____cacheline_aligned; /* @@ -589,7 +600,7 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) struct name_snapshot { struct qstr name; - unsigned char inline_name[DNAME_INLINE_LEN]; + union shortname_store inline_name; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); diff --git a/include/linux/export.h b/include/linux/export.h index 2633df4d31e6..a8c23d945634 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -52,9 +52,24 @@ #else +#ifdef CONFIG_GENDWARFKSYMS +/* + * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging + * information for all exported symbols, including those defined in + * different TUs, by adding a __gendwarfksyms_ptr_<symbol> pointer + * that's discarded during the final link. + */ +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; +#else +#define __GENDWARFKSYMS_EXPORT(sym) +#endif + #define __EXPORT_SYMBOL(sym, license, ns) \ extern typeof(sym) sym; \ __ADDRESSABLE(sym) \ + __GENDWARFKSYMS_EXPORT(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) #endif diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 772f822dc6b8..18855cb44b1c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -192,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) @@ -711,8 +712,8 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir, return 0; } -static inline int fscrypt_d_revalidate(struct dentry *dentry, - unsigned int flags) +static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return 1; } diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index a3ce553413de..abd0c8bd950b 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -19,8 +19,8 @@ struct io_uring_cmd { }; struct io_uring_cmd_data { - struct io_uring_sqe sqes[2]; void *op_data; + struct io_uring_sqe sqes[2]; }; static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 623d8e798a11..3def525a1da3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,7 +222,8 @@ struct io_alloc_cache { void **entries; unsigned int nr_cached; unsigned int max_cached; - size_t elem_size; + unsigned int elem_size; + unsigned int init_clear; }; struct io_ring_ctx { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5f1b2dc788e2..6b27db7f9496 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1406,6 +1406,7 @@ enum tlb_flush_reason { TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; diff --git a/include/linux/module.h b/include/linux/module.h index 37eb5d88f6eb..23792d5d7b74 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -430,7 +430,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const s32 *crcs; + const u32 *crcs; unsigned int num_syms; #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -448,7 +448,7 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const s32 *gpl_crcs; + const u32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_MODULE_SIG diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8da4c61f97b9..2a59034a5fa2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1085,8 +1085,8 @@ struct netdev_net_notifier { * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Old-style ioctl entry point. This is used internally by the - * appletalk and ieee802154 subsystems but is no longer called by - * the device ioctl handler. + * ieee802154 subsystem but is no longer called by the device + * ioctl handler. * * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); * Used by the bonding driver for its device specific ioctls: diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 162b7c0c3555..9155a6ffc370 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1781,7 +1781,7 @@ struct nfs_rpc_ops { struct nfs_fattr *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct inode *, struct dentry *, + int (*lookup) (struct inode *, struct dentry *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); diff --git a/include/linux/pm.h b/include/linux/pm.h index 08c37b83fea8..78855d794342 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns") #else #define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) #endif #ifdef CONFIG_PM_SLEEP @@ -684,6 +680,7 @@ struct dev_pm_info { bool no_pm_callbacks:1; /* Owned by the PM core */ bool async_in_progress:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ + bool set_active:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ #else bool should_wakeup:1; diff --git a/include/linux/swap.h b/include/linux/swap.h index a5f475335aea..b13b72645db3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -222,6 +222,7 @@ enum { }; #define SWAP_CLUSTER_MAX 32UL +#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index ed4cd114180a..7f405672b089 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -237,7 +237,6 @@ struct page_pool { struct { struct hlist_node list; u64 detach_time; - u32 napi_id; u32 id; } user; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4b0677e48190..ed4b83696c77 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1268,9 +1268,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (xo) { x = xfrm_input_state(skb); - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) - return (xo->flags & CRYPTO_DONE) && - (xo->status & CRYPTO_SUCCESS); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { + bool check = (xo->flags & CRYPTO_DONE) && + (xo->status & CRYPTO_SUCCESS); + + /* The packets here are plain ones and secpath was + * needed to indicate that hardware already handled + * them and there is no need to do nothing in addition. + * + * Consume secpath which was set by drivers. + */ + secpath_reset(skb); + return check; + } } return __xfrm_check_nopolicy(net, skb, dir) || diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h index e1571603175e..aa872a41ffb9 100644 --- a/include/uapi/mtd/ubi-user.h +++ b/include/uapi/mtd/ubi-user.h @@ -175,6 +175,8 @@ #define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32) /* Force scrubbing on the specified PEB */ #define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32) +/* Read detailed device erase counter information */ +#define UBI_IOCECNFO _IOWR(UBI_IOC_MAGIC, 6, struct ubi_ecinfo_req) /* ioctl commands of the UBI control character device */ @@ -413,6 +415,37 @@ struct ubi_rnvol_req { } __packed; /** + * struct ubi_ecinfo_req - a data structure used for requesting and receiving + * erase block counter information from a UBI device. + * + * @start: index of first physical erase block to read (in) + * @length: number of erase counters to read (in) + * @read_length: number of erase counters that was actually read (out) + * @padding: reserved for future, not used, has to be zeroed + * @erase_counters: array of erase counter values (out) + * + * This structure is used to retrieve erase counter information for a specified + * range of PEBs on a UBI device. + * Erase counters are read from @start and attempts to read @length number of + * erase counters. + * The retrieved values are stored in the @erase_counters array. It is the + * responsibility of the caller to allocate enough memory for storing @length + * elements in the @erase_counters array. + * If a block is bad or if the erase counter is unknown the corresponding value + * in the array will be set to -1. + * The @read_length field will indicate the number of erase counters actually + * read. Typically @read_length will be limited due to memory or the number of + * PEBs on the UBI device. + */ +struct ubi_ecinfo_req { + __s32 start; + __s32 length; + __s32 read_length; + __s8 padding[16]; + __s32 erase_counters[]; +} __packed; + +/** * struct ubi_leb_change_req - a data structure used in atomic LEB change * requests. * @lnum: logical eraseblock number to change diff --git a/init/Kconfig b/init/Kconfig index 4dbc059d2de5..d0d021b3fa3b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1969,7 +1969,8 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE - depends on !MODVERSIONS + select EXTENDED_MODVERSIONS if MODVERSIONS + depends on !MODVERSIONS || GENDWARFKSYMS depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE diff --git a/io_uring/Makefile b/io_uring/Makefile index 53167bef37d7..d695b60dba4f 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \ sync.o msg_ring.o advise.o openclose.o \ epoll.o statx.o timeout.o fdinfo.o \ cancel.o waitid.o register.o \ - truncate.o memmap.o + truncate.o memmap.o alloc_cache.o obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o diff --git a/io_uring/alloc_cache.c b/io_uring/alloc_cache.c new file mode 100644 index 000000000000..58423888b736 --- /dev/null +++ b/io_uring/alloc_cache.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "alloc_cache.h" + +void io_alloc_cache_free(struct io_alloc_cache *cache, + void (*free)(const void *)) +{ + void *entry; + + if (!cache->entries) + return; + + while ((entry = io_alloc_cache_get(cache)) != NULL) + free(entry); + + kvfree(cache->entries); + cache->entries = NULL; +} + +/* returns false if the cache was initialized properly */ +bool io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, unsigned int size, + unsigned int init_bytes) +{ + cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); + if (!cache->entries) + return true; + + cache->nr_cached = 0; + cache->max_cached = max_nr; + cache->elem_size = size; + cache->init_clear = init_bytes; + return false; +} + +void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp) +{ + void *obj; + + obj = kmalloc(cache->elem_size, gfp); + if (obj && cache->init_clear) + memset(obj, 0, cache->init_clear); + return obj; +} diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index a3a8cfec32ce..0dd17d8ba93a 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -1,11 +1,30 @@ #ifndef IOU_ALLOC_CACHE_H #define IOU_ALLOC_CACHE_H +#include <linux/io_uring_types.h> + /* * Don't allow the cache to grow beyond this size. */ #define IO_ALLOC_CACHE_MAX 128 +void io_alloc_cache_free(struct io_alloc_cache *cache, + void (*free)(const void *)); +bool io_alloc_cache_init(struct io_alloc_cache *cache, + unsigned max_nr, unsigned int size, + unsigned int init_bytes); + +void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp); + +static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr) +{ + if (IS_ENABLED(CONFIG_KASAN)) { + kfree(*iov); + *iov = NULL; + *nr = 0; + } +} + static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, void *entry) { @@ -23,52 +42,30 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache) if (cache->nr_cached) { void *entry = cache->entries[--cache->nr_cached]; + /* + * If KASAN is enabled, always clear the initial bytes that + * must be zeroed post alloc, in case any of them overlap + * with KASAN storage. + */ +#if defined(CONFIG_KASAN) kasan_mempool_unpoison_object(entry, cache->elem_size); + if (cache->init_clear) + memset(entry, 0, cache->init_clear); +#endif return entry; } return NULL; } -static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp, - void (*init_once)(void *obj)) +static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) { - if (unlikely(!cache->nr_cached)) { - void *obj = kmalloc(cache->elem_size, gfp); + void *obj; - if (obj && init_once) - init_once(obj); + obj = io_alloc_cache_get(cache); + if (obj) return obj; - } - return io_alloc_cache_get(cache); + return io_cache_alloc_new(cache, gfp); } -/* returns false if the cache was initialized properly */ -static inline bool io_alloc_cache_init(struct io_alloc_cache *cache, - unsigned max_nr, size_t size) -{ - cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); - if (cache->entries) { - cache->nr_cached = 0; - cache->max_cached = max_nr; - cache->elem_size = size; - return false; - } - return true; -} - -static inline void io_alloc_cache_free(struct io_alloc_cache *cache, - void (*free)(const void *)) -{ - void *entry; - - if (!cache->entries) - return; - - while ((entry = io_alloc_cache_get(cache)) != NULL) - free(entry); - - kvfree(cache->entries); - cache->entries = NULL; -} #endif diff --git a/io_uring/filetable.c b/io_uring/filetable.c index a21660e3145a..dd8eeec97acf 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (slot_index >= ctx->file_table.data.nr) return -EINVAL; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) return -ENOMEM; diff --git a/io_uring/futex.c b/io_uring/futex.c index 30139cc150f2..3159a2b7eeca 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -36,7 +36,7 @@ struct io_futex_data { bool io_futex_cache_init(struct io_ring_ctx *ctx) { return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, - sizeof(struct io_futex_data)); + sizeof(struct io_futex_data), 0); } void io_futex_cache_free(struct io_ring_ctx *ctx) @@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) } io_ring_submit_lock(ctx, issue_flags); - ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL); + ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT); if (!ifd) { ret = -ENOMEM; goto done_unlock; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5a0f8a5041d6..ceacf6230e34 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, - sizeof(struct async_poll)); + sizeof(struct async_poll), 0); ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_msghdr)); + sizeof(struct io_async_msghdr), + offsetof(struct io_async_msghdr, clear)); ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_rw)); + sizeof(struct io_async_rw), + offsetof(struct io_async_rw, clear)); ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_uring_cmd_data)); + sizeof(struct io_uring_cmd_data), 0); spin_lock_init(&ctx->msg_lock); ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_kiocb)); + sizeof(struct io_kiocb), 0); ret |= io_futex_cache_init(ctx); if (ret) goto free_ref; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index f65e3f3ede51..ab619e63ef39 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -226,21 +226,16 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags) } static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache, - struct io_kiocb *req, - void (*init_once)(void *obj)) + struct io_kiocb *req) { - req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once); - if (req->async_data) - req->flags |= REQ_F_ASYNC_DATA; - return req->async_data; -} + if (cache) { + req->async_data = io_cache_alloc(cache, GFP_KERNEL); + } else { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; -static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req) -{ - const struct io_issue_def *def = &io_issue_defs[req->opcode]; - - WARN_ON_ONCE(!def->async_size); - req->async_data = kmalloc(def->async_size, GFP_KERNEL); + WARN_ON_ONCE(!def->async_size); + req->async_data = kmalloc(def->async_size, GFP_KERNEL); + } if (req->async_data) req->flags |= REQ_F_ASYNC_DATA; return req->async_data; diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index bd3cd78d2dba..7e6f68e911f1 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -89,8 +89,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts) static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { - req->tctx = READ_ONCE(ctx->submitter_task->io_uring); - if (!req->tctx) { + if (!READ_ONCE(ctx->submitter_task)) { kmem_cache_free(req_cachep, req); return -EOWNERDEAD; } @@ -98,6 +97,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, io_req_set_res(req, res, cflags); percpu_ref_get(&ctx->refs); req->ctx = ctx; + req->tctx = NULL; req->io_task_work.func = io_msg_tw_complete; io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); return 0; diff --git a/io_uring/net.c b/io_uring/net.c index 85f55fbc25c9..17852a6616ff 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; - struct iovec *iov; /* can't recycle, ensure we free the iovec if we have one */ if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { @@ -146,44 +145,30 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) } /* Let normal cleanup path reap it if we fail adding to the cache */ - iov = hdr->free_iov; + io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr); if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } } -static void io_msg_async_data_init(void *obj) -{ - struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj; - - hdr->free_iov = NULL; - hdr->free_iov_nr = 0; -} - static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_msghdr *hdr; - hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req, - io_msg_async_data_init); + hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); if (!hdr) return NULL; /* If the async data was cached, we might have an iov cached inside. */ - if (hdr->free_iov) { - kasan_mempool_unpoison_object(hdr->free_iov, - hdr->free_iov_nr * sizeof(struct iovec)); + if (hdr->free_iov) req->flags |= REQ_F_NEED_CLEANUP; - } return hdr; } /* assign new iovec to kmsg, if we need to */ -static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, +static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, struct iovec *iov) { if (iov) { @@ -193,7 +178,6 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, kfree(kmsg->free_iov); kmsg->free_iov = iov; } - return 0; } static inline void io_mshot_prep_retry(struct io_kiocb *req, @@ -255,7 +239,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, if (unlikely(ret < 0)) return ret; - return io_net_vec_assign(req, iomsg, iov); + io_net_vec_assign(req, iomsg, iov); + return 0; } #endif @@ -295,11 +280,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, ret = -EINVAL; goto ua_end; } else { + struct iovec __user *uiov = msg->msg_iov; + /* we only need the length for provided buffers */ - if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) + if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len))) goto ua_end; - unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, - ua_end); + unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end); sr->len = iov->iov_len; } ret = 0; @@ -314,7 +300,8 @@ ua_end: if (unlikely(ret < 0)) return ret; - return io_net_vec_assign(req, iomsg, iov); + io_net_vec_assign(req, iomsg, iov); + return 0; } static int io_sendmsg_copy_hdr(struct io_kiocb *req, @@ -579,6 +566,54 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } +static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, + struct io_async_msghdr *kmsg) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + + int ret; + struct buf_sel_arg arg = { + .iovs = &kmsg->fast_iov, + .max_len = min_not_zero(sr->len, INT_MAX), + .nr_iovs = 1, + }; + + if (kmsg->free_iov) { + arg.nr_iovs = kmsg->free_iov_nr; + arg.iovs = kmsg->free_iov; + arg.mode = KBUF_MODE_FREE; + } + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) + arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; + + ret = io_buffers_select(req, &arg, issue_flags); + if (unlikely(ret < 0)) + return ret; + + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { + kmsg->free_iov_nr = ret; + kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; + } + sr->len = arg.out_len; + + if (ret == 1) { + sr->buf = arg.iovs[0].iov_base; + ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, + &kmsg->msg.msg_iter); + if (unlikely(ret)) + return ret; + } else { + iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, + arg.iovs, ret, arg.out_len); + } + + return 0; +} + int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); @@ -602,44 +637,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) retry_bundle: if (io_do_buffer_select(req)) { - struct buf_sel_arg arg = { - .iovs = &kmsg->fast_iov, - .max_len = min_not_zero(sr->len, INT_MAX), - .nr_iovs = 1, - }; - - if (kmsg->free_iov) { - arg.nr_iovs = kmsg->free_iov_nr; - arg.iovs = kmsg->free_iov; - arg.mode = KBUF_MODE_FREE; - } - - if (!(sr->flags & IORING_RECVSEND_BUNDLE)) - arg.nr_iovs = 1; - else - arg.mode |= KBUF_MODE_EXPAND; - - ret = io_buffers_select(req, &arg, issue_flags); - if (unlikely(ret < 0)) + ret = io_send_select_buffer(req, issue_flags, kmsg); + if (ret) return ret; - - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { - kmsg->free_iov_nr = ret; - kmsg->free_iov = arg.iovs; - req->flags |= REQ_F_NEED_CLEANUP; - } - sr->len = arg.out_len; - - if (ret == 1) { - sr->buf = arg.iovs[0].iov_base; - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - } else { - iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, - arg.iovs, ret, arg.out_len); - } } /* @@ -1710,6 +1710,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + if (unlikely(req->flags & REQ_F_FAIL)) { + ret = -ECONNRESET; + goto out; + } + file_flags = force_nonblock ? O_NONBLOCK : 0; ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, @@ -1813,11 +1818,8 @@ void io_netmsg_cache_free(const void *entry) { struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; - if (kmsg->free_iov) { - kasan_mempool_unpoison_object(kmsg->free_iov, - kmsg->free_iov_nr * sizeof(struct iovec)); + if (kmsg->free_iov) io_netmsg_iovec_free(kmsg); - } kfree(kmsg); } #endif diff --git a/io_uring/net.h b/io_uring/net.h index 52bfee05f06a..b804c2b36e60 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -5,16 +5,20 @@ struct io_async_msghdr { #if defined(CONFIG_NET) - struct iovec fast_iov; - /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; + /* points to an allocated iov, if NULL we use fast_iov instead */ int free_iov_nr; - int namelen; - __kernel_size_t controllen; - __kernel_size_t payloadlen; - struct sockaddr __user *uaddr; - struct msghdr msg; - struct sockaddr_storage addr; + struct_group(clear, + int namelen; + struct iovec fast_iov; + __kernel_size_t controllen; + __kernel_size_t payloadlen; + struct sockaddr __user *uaddr; + struct msghdr msg; + struct sockaddr_storage addr; + ); +#else + struct_group(clear); #endif }; diff --git a/io_uring/poll.c b/io_uring/poll.c index cc01c40b43d3..bb1c0cd4f809 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -273,6 +273,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) return IOU_POLL_REISSUE; } } + if (unlikely(req->cqe.res & EPOLLERR)) + req_set_fail(req); if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; @@ -315,8 +317,10 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) ret = io_poll_check_events(req, ts); if (ret == IOU_POLL_NO_ACTION) { + io_kbuf_recycle(req, 0); return; } else if (ret == IOU_POLL_REQUEUE) { + io_kbuf_recycle(req, 0); __io_poll_execute(req, 0); return; } @@ -650,7 +654,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, kfree(apoll->double_poll); } else { if (!(issue_flags & IO_URING_F_UNLOCKED)) - apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL); + apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC); else apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (!apoll) diff --git a/io_uring/register.c b/io_uring/register.c index 05025047d1da..9a4d2fbce4ae 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -552,7 +552,7 @@ overflow: ctx->cqe_cached = ctx->cqe_sentinel = NULL; WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped)); - WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags)); + atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags)); WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags)); WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow)); @@ -853,6 +853,8 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered) return ERR_PTR(-EINVAL); fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); file = tctx->registered_rings[fd]; + if (file) + get_file(file); } else { file = fget(fd); } @@ -919,7 +921,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr, ctx->buf_table.nr, ret); mutex_unlock(&ctx->uring_lock); - if (!use_registered_ring) - fput(file); + + fput(file); return ret; } diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index e32ac5853391..af39b69eb4fd 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -118,7 +118,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node) } } -struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) +struct io_rsrc_node *io_rsrc_node_alloc(int type) { struct io_rsrc_node *node; @@ -203,7 +203,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { err = -ENOMEM; fput(file); @@ -444,8 +444,6 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags) void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { - lockdep_assert_held(&ctx->uring_lock); - if (node->tag) io_post_aux_cqe(ctx, node->tag, 0, 0); @@ -525,7 +523,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; } ret = -ENOMEM; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); + node = io_rsrc_node_alloc(IORING_RSRC_FILE); if (!node) { fput(file); goto fail; @@ -730,7 +728,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (!iov->iov_base) return NULL; - node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); + node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!node) return ERR_PTR(-ENOMEM); node->buf = NULL; @@ -921,6 +919,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter, return 0; } +/* Lock two rings at once. The rings must be different! */ +static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2) +{ + if (ctx1 > ctx2) + swap(ctx1, ctx2); + mutex_lock(&ctx1->uring_lock); + mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING); +} + +/* Both rings are locked by the caller. */ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, struct io_uring_clone_buffers *arg) { @@ -928,6 +936,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx int i, ret, off, nr; unsigned int nbufs; + lockdep_assert_held(&ctx->uring_lock); + lockdep_assert_held(&src_ctx->uring_lock); + /* * Accounting state is shared between the two rings; that only works if * both rings are accounted towards the same counters. @@ -942,7 +953,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE)) return -EBUSY; - nbufs = READ_ONCE(src_ctx->buf_table.nr); + nbufs = src_ctx->buf_table.nr; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) @@ -966,27 +977,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx } } - /* - * Drop our own lock here. We'll setup the data we need and reference - * the source buffers, then re-grab, check, and assign at the end. - */ - mutex_unlock(&ctx->uring_lock); - - mutex_lock(&src_ctx->uring_lock); ret = -ENXIO; nbufs = src_ctx->buf_table.nr; if (!nbufs) - goto out_unlock; + goto out_free; ret = -EINVAL; if (!arg->nr) arg->nr = nbufs; else if (arg->nr > nbufs) - goto out_unlock; + goto out_free; ret = -EOVERFLOW; if (check_add_overflow(arg->nr, arg->src_off, &off)) - goto out_unlock; + goto out_free; if (off > nbufs) - goto out_unlock; + goto out_free; off = arg->dst_off; i = arg->src_off; @@ -998,10 +1002,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (!src_node) { dst_node = NULL; } else { - dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); + dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; - goto out_unlock; + goto out_free; } refcount_inc(&src_node->buf->refs); @@ -1011,10 +1015,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx i++; } - /* Have a ref on the bufs now, drop src lock and re-grab our own lock */ - mutex_unlock(&src_ctx->uring_lock); - mutex_lock(&ctx->uring_lock); - /* * If asked for replace, put the old table. data->nodes[] holds both * old and new nodes at this point. @@ -1023,24 +1023,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx io_rsrc_data_free(ctx, &ctx->buf_table); /* - * ctx->buf_table should be empty now - either the contents are being - * replaced and we just freed the table, or someone raced setting up - * a buffer table while the clone was happening. If not empty, fall - * through to failure handling. + * ctx->buf_table must be empty now - either the contents are being + * replaced and we just freed the table, or the contents are being + * copied to a ring that does not have buffers yet (checked at function + * entry). */ - if (!ctx->buf_table.nr) { - ctx->buf_table = data; - return 0; - } + WARN_ON_ONCE(ctx->buf_table.nr); + ctx->buf_table = data; + return 0; - mutex_unlock(&ctx->uring_lock); - mutex_lock(&src_ctx->uring_lock); - /* someone raced setting up buffers, dump ours */ - ret = -EBUSY; -out_unlock: +out_free: io_rsrc_data_free(ctx, &data); - mutex_unlock(&src_ctx->uring_lock); - mutex_lock(&ctx->uring_lock); return ret; } @@ -1054,6 +1047,7 @@ out_unlock: int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_clone_buffers buf; + struct io_ring_ctx *src_ctx; bool registered_src; struct file *file; int ret; @@ -1071,8 +1065,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg) file = io_uring_register_get_file(buf.src_fd, registered_src); if (IS_ERR(file)) return PTR_ERR(file); - ret = io_clone_buffers(ctx, file->private_data, &buf); - if (!registered_src) - fput(file); + + src_ctx = file->private_data; + if (src_ctx != ctx) { + mutex_unlock(&ctx->uring_lock); + lock_two_rings(ctx, src_ctx); + } + + ret = io_clone_buffers(ctx, src_ctx, &buf); + + if (src_ctx != ctx) + mutex_unlock(&src_ctx->uring_lock); + + fput(file); return ret; } diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index c8b093584461..190f7ee45de9 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -2,6 +2,8 @@ #ifndef IOU_RSRC_H #define IOU_RSRC_H +#include <linux/lockdep.h> + #define IO_NODE_ALLOC_CACHE_MAX 32 #define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) @@ -43,7 +45,7 @@ struct io_imu_folio_data { unsigned int nr_folios; }; -struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); +struct io_rsrc_node *io_rsrc_node_alloc(int type); void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr); @@ -80,6 +82,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { + lockdep_assert_held(&ctx->uring_lock); if (node && !--node->refs) io_free_rsrc_node(ctx, node); } diff --git a/io_uring/rw.c b/io_uring/rw.c index a9a2733be842..7aa1e4c9f64a 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -146,28 +146,15 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req, return 0; } -static void io_rw_iovec_free(struct io_async_rw *rw) -{ - if (rw->free_iovec) { - kfree(rw->free_iovec); - rw->free_iov_nr = 0; - rw->free_iovec = NULL; - } -} - static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_rw *rw = req->async_data; - struct iovec *iov; - if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { - io_rw_iovec_free(rw); + if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) return; - } - iov = rw->free_iovec; + + io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr); if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } @@ -208,27 +195,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) } } -static void io_rw_async_data_init(void *obj) -{ - struct io_async_rw *rw = (struct io_async_rw *)obj; - - rw->free_iovec = NULL; - rw->bytes_done = 0; -} - static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_rw *rw; - rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init); + rw = io_uring_alloc_async_data(&ctx->rw_cache, req); if (!rw) return -ENOMEM; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); + if (rw->free_iovec) req->flags |= REQ_F_NEED_CLEANUP; - } rw->bytes_done = 0; return 0; } @@ -1323,10 +1299,7 @@ void io_rw_cache_free(const void *entry) { struct io_async_rw *rw = (struct io_async_rw *) entry; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); - io_rw_iovec_free(rw); - } + if (rw->free_iovec) + kfree(rw->free_iovec); kfree(rw); } diff --git a/io_uring/rw.h b/io_uring/rw.h index 2d7656bd268d..eaa59bd64870 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -9,19 +9,24 @@ struct io_meta_state { struct io_async_rw { size_t bytes_done; - struct iov_iter iter; - struct iov_iter_state iter_state; - struct iovec fast_iov; struct iovec *free_iovec; - int free_iov_nr; - /* wpq is for buffered io, while meta fields are used with direct io */ - union { - struct wait_page_queue wpq; - struct { - struct uio_meta meta; - struct io_meta_state meta_state; + struct_group(clear, + struct iov_iter iter; + struct iov_iter_state iter_state; + struct iovec fast_iov; + int free_iov_nr; + /* + * wpq is for buffered io, while meta fields are used with + * direct io + */ + union { + struct wait_page_queue wpq; + struct { + struct uio_meta meta; + struct io_meta_state meta_state; + }; }; - }; + ); }; int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 2bd7e0a317bb..48fc8cf70784 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -544,7 +544,7 @@ static int __io_timeout_prep(struct io_kiocb *req, if (WARN_ON_ONCE(req_has_async_data(req))) return -EFAULT; - data = io_uring_alloc_async_data_nocache(req); + data = io_uring_alloc_async_data(NULL, req); if (!data) return -ENOMEM; data->req = req; diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index fc94c465a985..1f6a82128b47 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -static void io_uring_cmd_init_once(void *obj) -{ - struct io_uring_cmd_data *data = obj; - - data->op_data = NULL; -} - static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd_data *cache; - cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, - io_uring_cmd_init_once); + cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req); if (!cache) return -ENOMEM; + cache->op_data = NULL; if (!(req->flags & REQ_F_FORCE_ASYNC)) { /* defer memcpy until we need it */ @@ -192,8 +185,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return 0; } - memcpy(req->async_data, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = req->async_data; + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; return 0; } @@ -260,7 +253,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != (void *) cache) - memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx)); + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { return -EIOCBQUEUED; @@ -350,7 +343,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) if (!prot || !prot->ioctl) return -EOPNOTSUPP; - switch (cmd->sqe->cmd_op) { + switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: ret = prot->ioctl(sk, SIOCINQ, &arg); if (ret) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 6778c0ee76c4..853e97a7b0ec 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -303,7 +303,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags) struct io_waitid_async *iwa; int ret; - iwa = io_uring_alloc_async_data_nocache(req); + iwa = io_uring_alloc_async_data(NULL, req); if (!iwa) return -ENOMEM; diff --git a/kernel/audit.c b/kernel/audit.c index 13d0144efaa3..5f5bf85bcc90 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1221,7 +1221,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; struct audit_sig_info *sig_data; - struct lsm_context lsmctx; + struct lsm_context lsmctx = { NULL, 0, 0 }; err = audit_netlink_ok(skb, msg_type); if (err) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e421a5f2ec7d..2ca797cbe465 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -28,6 +28,7 @@ #include <linux/rcupdate_trace.h> #include <linux/workqueue.h> #include <linux/srcu.h> +#include <linux/oom.h> /* check_stable_address_space */ #include <linux/uprobes.h> @@ -1260,6 +1261,9 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) * returns NULL in find_active_uprobe_rcu(). */ mmap_write_lock(mm); + if (check_stable_address_space(mm)) + goto unlock; + vma = find_vma(mm, info->vaddr); if (!vma || !valid_vma(vma, is_register) || file_inode(vma->vm_file) != uprobe->inode) diff --git a/kernel/fork.c b/kernel/fork.c index cba5ede2c639..735405a9c5f3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -760,7 +760,8 @@ loop_out: mt_set_in_rcu(vmi.mas.tree); ksm_fork(mm, oldmm); khugepaged_fork(mm, oldmm); - } else if (mpnt) { + } else { + /* * The entire maple tree has already been duplicated. If the * mmap duplication fails, mark the failure point with @@ -768,8 +769,18 @@ loop_out: * stop releasing VMAs that have not been duplicated after this * point. */ - mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); - mas_store(&vmi.mas, XA_ZERO_ENTRY); + if (mpnt) { + mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); + mas_store(&vmi.mas, XA_ZERO_ENTRY); + /* Avoid OOM iterating a broken tree */ + set_bit(MMF_OOM_SKIP, &mm->flags); + } + /* + * The mm_struct is going to exit, but the locks will be dropped + * first. Set the mm_struct as unstable is advisable as it is + * not fully initialised. + */ + set_bit(MMF_UNSTABLE, &mm->flags); } out: mmap_write_unlock(mm); diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 7e1340da5aca..00529c81cc40 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -7,20 +7,13 @@ set -e sfile="$(readlink -f "$0")" outdir="$(pwd)" tarfile=$1 -cpio_dir=$outdir/${tarfile%/*}/.tmp_cpio_dir +tmpdir=$outdir/${tarfile%/*}/.tmp_dir dir_list=" include/ arch/$SRCARCH/include/ " -if ! command -v cpio >/dev/null; then - echo >&2 "***" - echo >&2 "*** 'cpio' could not be found." - echo >&2 "***" - exit 1 -fi - # Support incremental builds by skipping archive generation # if timestamps of files being archived are not changed. @@ -48,9 +41,9 @@ all_dirs="$all_dirs $dir_list" # check include/generated/autoconf.h explicitly. # # Ignore them for md5 calculation to avoid pointless regeneration. -headers_md5="$(find $all_dirs -name "*.h" | - grep -v "include/generated/utsversion.h" | - grep -v "include/generated/autoconf.h" | +headers_md5="$(find $all_dirs -name "*.h" -a \ + ! -path include/generated/utsversion.h -a \ + ! -path include/generated/autoconf.h | xargs ls -l | md5sum | cut -d ' ' -f1)" # Any changes to this script will also cause a rebuild of the archive. @@ -65,36 +58,43 @@ fi echo " GEN $tarfile" -rm -rf $cpio_dir -mkdir $cpio_dir +rm -rf "${tmpdir}" +mkdir "${tmpdir}" if [ "$building_out_of_srctree" ]; then ( cd $srctree for f in $dir_list do find "$f" -name "*.h"; - done | cpio --quiet -pd $cpio_dir + done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" ) fi -# The second CPIO can complain if files already exist which can happen with out -# of tree builds having stale headers in srctree. Just silence CPIO for now. for f in $dir_list; do find "$f" -name "*.h"; -done | cpio --quiet -pdu $cpio_dir >/dev/null 2>&1 +done | tar -c -f - -T - | tar -xf - -C "${tmpdir}" + +# Always exclude include/generated/utsversion.h +# Otherwise, the contents of the tarball may vary depending on the build steps. +rm -f "${tmpdir}/include/generated/utsversion.h" # Remove comments except SDPX lines -find $cpio_dir -type f -print0 | - xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' +# Use a temporary file to store directory contents to prevent find/xargs from +# seeing temporary files created by perl. +find "${tmpdir}" -type f -print0 > "${tmpdir}.contents.txt" +xargs -0 -P8 -n1 \ + perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' \ + < "${tmpdir}.contents.txt" +rm -f "${tmpdir}.contents.txt" # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ --exclude=".__afs*" --exclude=".nfs*" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ - -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null + -I $XZ -cf $tarfile -C "${tmpdir}/" . > /dev/null echo $headers_md5 > kernel/kheaders.md5 echo "$this_file_md5" >> kernel/kheaders.md5 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 -rm -rf $cpio_dir +rm -rf "${tmpdir}" diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 74fe976e3b01..d7762ef5949a 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -169,6 +169,36 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +choice + prompt "Module versioning implementation" + depends on MODVERSIONS + help + Select the tool used to calculate symbol versions for modules. + + If unsure, select GENKSYMS. + +config GENKSYMS + bool "genksyms (from source code)" + help + Calculate symbol versions from pre-processed source code using + genksyms. + + If unsure, say Y. + +config GENDWARFKSYMS + bool "gendwarfksyms (from debugging information)" + depends on DEBUG_INFO + # Requires full debugging information, split DWARF not supported. + depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT + # Requires ELF object files. + depends on !LTO + help + Calculate symbol versions from DWARF debugging information using + gendwarfksyms. Requires DEBUG_INFO to be enabled. + + If unsure, say N. +endchoice + config ASM_MODVERSIONS bool default HAVE_ASM_MODVERSIONS && MODVERSIONS @@ -177,6 +207,31 @@ config ASM_MODVERSIONS assembly. This can be enabled only when the target architecture supports it. +config EXTENDED_MODVERSIONS + bool "Extended Module Versioning Support" + depends on MODVERSIONS + help + This enables extended MODVERSIONs support, allowing long symbol + names to be versioned. + + The most likely reason you would enable this is to enable Rust + support. If unsure, say N. + +config BASIC_MODVERSIONS + bool "Basic Module Versioning Support" + depends on MODVERSIONS + default y + help + This enables basic MODVERSIONS support, allowing older tools or + kernels to potentially load modules. + + Disabling this may cause older `modprobe` or `kmod` to be unable + to read MODVERSIONS information from built modules. With this + disabled, older kernels may treat this module as unversioned. + + This is enabled by default when MODVERSIONS are enabled. + If unsure, say Y. + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help diff --git a/kernel/module/internal.h b/kernel/module/internal.h index b35c0ec54a89..d09b46ef032f 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -55,8 +55,8 @@ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; -extern const s32 __start___kcrctab[]; -extern const s32 __start___kcrctab_gpl[]; +extern const u32 __start___kcrctab[]; +extern const u32 __start___kcrctab_gpl[]; struct load_info { const char *name; @@ -86,6 +86,8 @@ struct load_info { unsigned int vers; unsigned int info; unsigned int pcpu; + unsigned int vers_ext_crc; + unsigned int vers_ext_name; } index; }; @@ -102,7 +104,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; - const s32 *crc; + const u32 *crc; const struct kernel_symbol *sym; enum mod_license license; }; @@ -385,16 +387,25 @@ static inline void init_param_lock(struct module *mod) { } #ifdef CONFIG_MODVERSIONS int check_version(const struct load_info *info, - const char *symname, struct module *mod, const s32 *crc); + const char *symname, struct module *mod, const u32 *crc); void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, struct tracepoint * const *tp); int check_modstruct_version(const struct load_info *info, struct module *mod); int same_magic(const char *amagic, const char *bmagic, bool has_crcs); +struct modversion_info_ext { + size_t remaining; + const u32 *crc; + const char *name; +}; +void modversion_ext_start(const struct load_info *info, struct modversion_info_ext *ver); +void modversion_ext_advance(struct modversion_info_ext *ver); +#define for_each_modversion_info_ext(ver, info) \ + for (modversion_ext_start(info, &ver); ver.remaining > 0; modversion_ext_advance(&ver)) #else /* !CONFIG_MODVERSIONS */ static inline int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { return 1; } diff --git a/kernel/module/main.c b/kernel/module/main.c index 8808b6906d5a..1fb9ad289a6f 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -86,7 +86,7 @@ struct mod_tree_root mod_tree __cacheline_aligned = { struct symsearch { const struct kernel_symbol *start, *stop; - const s32 *crcs; + const u32 *crcs; enum mod_license license; }; @@ -2074,6 +2074,82 @@ static int elf_validity_cache_index_str(struct load_info *info) } /** + * elf_validity_cache_index_versions() - Validate and cache version indices + * @info: Load info to cache version indices in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * @flags: Load flags, relevant to suppress version loading, see + * uapi/linux/module.h + * + * If we're ignoring modversions based on @flags, zero all version indices + * and return validity. Othewrise check: + * + * * If "__version_ext_crcs" is present, "__version_ext_names" is present + * * There is a name present for every crc + * + * Then populate: + * + * * &load_info->index.vers + * * &load_info->index.vers_ext_crc + * * &load_info->index.vers_ext_names + * + * if present. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_versions(struct load_info *info, int flags) +{ + unsigned int vers_ext_crc; + unsigned int vers_ext_name; + size_t crc_count; + size_t remaining_len; + size_t name_size; + char *name; + + /* If modversions were suppressed, pretend we didn't find any */ + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) { + info->index.vers = 0; + info->index.vers_ext_crc = 0; + info->index.vers_ext_name = 0; + return 0; + } + + vers_ext_crc = find_sec(info, "__version_ext_crcs"); + vers_ext_name = find_sec(info, "__version_ext_names"); + + /* If we have one field, we must have the other */ + if (!!vers_ext_crc != !!vers_ext_name) { + pr_err("extended version crc+name presence does not match"); + return -ENOEXEC; + } + + /* + * If we have extended version information, we should have the same + * number of entries in every section. + */ + if (vers_ext_crc) { + crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(u32); + name = (void *)info->hdr + + info->sechdrs[vers_ext_name].sh_offset; + remaining_len = info->sechdrs[vers_ext_name].sh_size; + + while (crc_count--) { + name_size = strnlen(name, remaining_len) + 1; + if (name_size > remaining_len) { + pr_err("more extended version crcs than names"); + return -ENOEXEC; + } + remaining_len -= name_size; + name += name_size; + } + } + + info->index.vers = find_sec(info, "__versions"); + info->index.vers_ext_crc = vers_ext_crc; + info->index.vers_ext_name = vers_ext_name; + return 0; +} + +/** * elf_validity_cache_index() - Resolve, validate, cache section indices * @info: Load info to read from and update. * &load_info->sechdrs and &load_info->secstrings must be populated. @@ -2087,9 +2163,7 @@ static int elf_validity_cache_index_str(struct load_info *info) * * elf_validity_cache_index_mod() * * elf_validity_cache_index_sym() * * elf_validity_cache_index_str() - * - * If versioning is not suppressed via flags, load the version index from - * a section called "__versions" with no validation. + * * elf_validity_cache_index_versions() * * If CONFIG_SMP is enabled, load the percpu section by name with no * validation. @@ -2112,11 +2186,9 @@ static int elf_validity_cache_index(struct load_info *info, int flags) err = elf_validity_cache_index_str(info); if (err < 0) return err; - - if (flags & MODULE_INIT_IGNORE_MODVERSIONS) - info->index.vers = 0; /* Pretend no __versions section! */ - else - info->index.vers = find_sec(info, "__versions"); + err = elf_validity_cache_index_versions(info, flags); + if (err < 0) + return err; info->index.pcpu = find_pcpusec(info); @@ -2327,6 +2399,10 @@ static int rewrite_section_headers(struct load_info *info, int flags) /* Track but don't keep modinfo and version sections. */ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_crc].sh_flags &= + ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_name].sh_flags &= + ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; return 0; diff --git a/kernel/module/version.c b/kernel/module/version.c index 53f43ac5a73e..3718a8868321 100644 --- a/kernel/module/version.c +++ b/kernel/module/version.c @@ -13,17 +13,34 @@ int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { Elf_Shdr *sechdrs = info->sechdrs; unsigned int versindex = info->index.vers; unsigned int i, num_versions; struct modversion_info *versions; + struct modversion_info_ext version_ext; /* Exporting module didn't supply crcs? OK, we're already tainted. */ if (!crc) return 1; + /* If we have extended version info, rely on it */ + if (info->index.vers_ext_crc) { + for_each_modversion_info_ext(version_ext, info) { + if (strcmp(version_ext.name, symname) != 0) + continue; + if (*version_ext.crc == *crc) + return 1; + pr_debug("Found checksum %X vs module %X\n", + *crc, *version_ext.crc); + goto bad_version; + } + pr_warn_once("%s: no extended symbol version for %s\n", + info->name, symname); + return 1; + } + /* No versions at all? modprobe --force does this. */ if (versindex == 0) return try_to_force_load(mod, symname) == 0; @@ -87,6 +104,34 @@ int same_magic(const char *amagic, const char *bmagic, return strcmp(amagic, bmagic) == 0; } +void modversion_ext_start(const struct load_info *info, + struct modversion_info_ext *start) +{ + unsigned int crc_idx = info->index.vers_ext_crc; + unsigned int name_idx = info->index.vers_ext_name; + Elf_Shdr *sechdrs = info->sechdrs; + + /* + * Both of these fields are needed for this to be useful + * Any future fields should be initialized to NULL if absent. + */ + if (crc_idx == 0 || name_idx == 0) { + start->remaining = 0; + return; + } + + start->crc = (const u32 *)sechdrs[crc_idx].sh_addr; + start->name = (const char *)sechdrs[name_idx].sh_addr; + start->remaining = sechdrs[crc_idx].sh_size / sizeof(*start->crc); +} + +void modversion_ext_advance(struct modversion_info_ext *vers) +{ + vers->remaining--; + vers->crc++; + vers->name += strlen(vers->name) + 1; +} + /* * Generate the signature for all relevant module structures here. * If these change, we don't want to try to parse the module. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1f87aa01ba44..10a01af63a80 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -608,7 +608,11 @@ int hibernation_platform_enter(void) local_irq_disable(); system_state = SYSTEM_SUSPEND; - syscore_suspend(); + + error = syscore_suspend(); + if (error) + goto Enable_irqs; + if (pm_wakeup_pending()) { error = -EAGAIN; goto Power_up; @@ -620,6 +624,7 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); + Enable_irqs: system_state = SYSTEM_RUNNING; local_irq_enable(); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index a2a29e3fffca..1a19d69b91ed 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -666,7 +666,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } sg_policy->thread = thread; - kthread_bind_mask(thread, policy->related_cpus); + if (policy->dvfs_possible_from_any_cpu) + set_cpus_allowed_ptr(thread, policy->related_cpus); + else + kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); mutex_init(&sg_policy->work_lock); diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 149e2c8036d3..456d339be98f 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -1130,6 +1130,13 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) return 0; /* + * The special/sugov task isn't part of regular bandwidth/admission + * control so let userspace change affinities. + */ + if (dl_entity_is_special(&p->dl)) + return 0; + + /* * Since bandwidth control happens on root_domain basis, * if admission test is enabled, we only admit -deadline * tasks allowed to run on all the CPUs in the task's diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 775966cf6114..1af972a92d06 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2456,22 +2456,8 @@ config TEST_BITMAP config TEST_UUID tristate "Test functions located in the uuid module at runtime" -config XARRAY_KUNIT - tristate "KUnit test XArray code at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - Enable this option to test the Xarray code at boot. - - KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs - running the KUnit test harness, and not intended for inclusion into a - production build. - - For more information on KUnit and unit tests in general please refer - to the KUnit documentation in Documentation/dev-tools/kunit/. - - If unsure, say N. +config TEST_XARRAY + tristate "Test the XArray code at runtime" config TEST_MAPLE_TREE tristate "Test the Maple Tree code at runtime or module load" diff --git a/lib/Makefile b/lib/Makefile index f1c6e9d76a7c..d5cfc7afbbb8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -94,6 +94,7 @@ GCOV_PROFILE_test_bitmap.o := n endif obj-$(CONFIG_TEST_UUID) += test_uuid.o +obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o @@ -372,7 +373,6 @@ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o -obj-$(CONFIG_XARRAY_KUNIT) += test_xarray.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index c40818ec9c18..fbe910c9c825 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -47,10 +47,12 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define DO_NOTHING_TYPE_SCALAR(var_type) var_type #define DO_NOTHING_TYPE_STRING(var_type) void #define DO_NOTHING_TYPE_STRUCT(var_type) void +#define DO_NOTHING_TYPE_UNION(var_type) void #define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) #define DO_NOTHING_RETURN_STRING(ptr) /**/ #define DO_NOTHING_RETURN_STRUCT(ptr) /**/ +#define DO_NOTHING_RETURN_UNION(ptr) /**/ #define DO_NOTHING_CALL_SCALAR(var, name) \ (var) = do_nothing_ ## name(&(var)) @@ -58,10 +60,13 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, do_nothing_ ## name(var) #define DO_NOTHING_CALL_STRUCT(var, name) \ do_nothing_ ## name(&(var)) +#define DO_NOTHING_CALL_UNION(var, name) \ + do_nothing_ ## name(&(var)) #define FETCH_ARG_SCALAR(var) &var #define FETCH_ARG_STRING(var) var #define FETCH_ARG_STRUCT(var) &var +#define FETCH_ARG_UNION(var) &var /* * On m68k, if the leaf function test variable is longer than 8 bytes, @@ -77,6 +82,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_CLONE_SCALAR /**/ #define INIT_CLONE_STRING [FILL_SIZE_STRING] #define INIT_CLONE_STRUCT /**/ +#define INIT_CLONE_UNION /**/ #define ZERO_CLONE_SCALAR(zero) memset(&(zero), 0x00, sizeof(zero)) #define ZERO_CLONE_STRING(zero) memset(&(zero), 0x00, sizeof(zero)) @@ -92,6 +98,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, zero.three = 0; \ zero.four = 0; \ } while (0) +#define ZERO_CLONE_UNION(zero) ZERO_CLONE_STRUCT(zero) #define INIT_SCALAR_none(var_type) /**/ #define INIT_SCALAR_zero(var_type) = 0 @@ -101,6 +108,7 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_none(var_type) /**/ #define INIT_STRUCT_zero(var_type) = { } +#define INIT_STRUCT_old_zero(var_type) = { 0 } #define __static_partial { .two = 0, } @@ -146,6 +154,34 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, #define INIT_STRUCT_assigned_copy(var_type) \ ; var = *(arg) +/* Union initialization is the same as structs. */ +#define INIT_UNION_none(var_type) INIT_STRUCT_none(var_type) +#define INIT_UNION_zero(var_type) INIT_STRUCT_zero(var_type) +#define INIT_UNION_old_zero(var_type) INIT_STRUCT_old_zero(var_type) + +#define INIT_UNION_static_partial(var_type) \ + INIT_STRUCT_static_partial(var_type) +#define INIT_UNION_static_all(var_type) \ + INIT_STRUCT_static_all(var_type) +#define INIT_UNION_dynamic_partial(var_type) \ + INIT_STRUCT_dynamic_partial(var_type) +#define INIT_UNION_dynamic_all(var_type) \ + INIT_STRUCT_dynamic_all(var_type) +#define INIT_UNION_runtime_partial(var_type) \ + INIT_STRUCT_runtime_partial(var_type) +#define INIT_UNION_runtime_all(var_type) \ + INIT_STRUCT_runtime_all(var_type) +#define INIT_UNION_assigned_static_partial(var_type) \ + INIT_STRUCT_assigned_static_partial(var_type) +#define INIT_UNION_assigned_static_all(var_type) \ + INIT_STRUCT_assigned_static_all(var_type) +#define INIT_UNION_assigned_dynamic_partial(var_type) \ + INIT_STRUCT_assigned_dynamic_partial(var_type) +#define INIT_UNION_assigned_dynamic_all(var_type) \ + INIT_STRUCT_assigned_dynamic_all(var_type) +#define INIT_UNION_assigned_copy(var_type) \ + INIT_STRUCT_assigned_copy(var_type) + /* * @name: unique string name for the test * @var_type: type to be tested for zeroing initialization @@ -294,6 +330,33 @@ struct test_user { unsigned long four; }; +/* No padding: all members are the same size. */ +union test_same_sizes { + unsigned long one; + unsigned long two; + unsigned long three; + unsigned long four; +}; + +/* Mismatched sizes, with one and two being small */ +union test_small_start { + char one:1; + char two; + short three; + unsigned long four; + struct big_struct { + unsigned long array[8]; + } big; +}; + +/* Mismatched sizes, with one and two being small */ +union test_small_end { + short one; + unsigned long two; + char three:1; + char four; +}; + #define ALWAYS_PASS WANT_SUCCESS #define ALWAYS_FAIL XFAIL @@ -332,6 +395,11 @@ struct test_user { struct test_ ## name, STRUCT, init, \ xfail) +#define DEFINE_UNION_TEST(name, init, xfail) \ + DEFINE_TEST(name ## _ ## init, \ + union test_ ## name, STRUCT, init, \ + xfail) + #define DEFINE_STRUCT_TESTS(init, xfail) \ DEFINE_STRUCT_TEST(small_hole, init, xfail); \ DEFINE_STRUCT_TEST(big_hole, init, xfail); \ @@ -343,9 +411,22 @@ struct test_user { xfail); \ DEFINE_STRUCT_TESTS(base ## _ ## all, xfail) +#define DEFINE_UNION_INITIALIZER_TESTS(base, xfail) \ + DEFINE_UNION_TESTS(base ## _ ## partial, \ + xfail); \ + DEFINE_UNION_TESTS(base ## _ ## all, xfail) + +#define DEFINE_UNION_TESTS(init, xfail) \ + DEFINE_UNION_TEST(same_sizes, init, xfail); \ + DEFINE_UNION_TEST(small_start, init, xfail); \ + DEFINE_UNION_TEST(small_end, init, xfail); + /* These should be fully initialized all the time! */ DEFINE_SCALAR_TESTS(zero, ALWAYS_PASS); DEFINE_STRUCT_TESTS(zero, ALWAYS_PASS); +DEFINE_STRUCT_TESTS(old_zero, ALWAYS_PASS); +DEFINE_UNION_TESTS(zero, ALWAYS_PASS); +DEFINE_UNION_TESTS(old_zero, ALWAYS_PASS); /* Struct initializers: padding may be left uninitialized. */ DEFINE_STRUCT_INITIALIZER_TESTS(static, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(dynamic, STRONG_PASS); @@ -353,6 +434,12 @@ DEFINE_STRUCT_INITIALIZER_TESTS(runtime, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(assigned_static, STRONG_PASS); DEFINE_STRUCT_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS); DEFINE_STRUCT_TESTS(assigned_copy, ALWAYS_FAIL); +DEFINE_UNION_INITIALIZER_TESTS(static, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(dynamic, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(runtime, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(assigned_static, STRONG_PASS); +DEFINE_UNION_INITIALIZER_TESTS(assigned_dynamic, STRONG_PASS); +DEFINE_UNION_TESTS(assigned_copy, ALWAYS_FAIL); /* No initialization without compiler instrumentation. */ DEFINE_SCALAR_TESTS(none, STRONG_PASS); DEFINE_STRUCT_TESTS(none, BYREF_PASS); @@ -436,13 +523,23 @@ DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL); KUNIT_CASE(test_trailing_hole_ ## init),\ KUNIT_CASE(test_packed_ ## init) \ +#define KUNIT_test_unions(init) \ + KUNIT_CASE(test_same_sizes_ ## init), \ + KUNIT_CASE(test_small_start_ ## init), \ + KUNIT_CASE(test_small_end_ ## init) \ + static struct kunit_case stackinit_test_cases[] = { /* These are explicitly initialized and should always pass. */ KUNIT_test_scalars(zero), KUNIT_test_structs(zero), + KUNIT_test_structs(old_zero), + KUNIT_test_unions(zero), + KUNIT_test_unions(old_zero), /* Padding here appears to be accidentally always initialized? */ KUNIT_test_structs(dynamic_partial), KUNIT_test_structs(assigned_dynamic_partial), + KUNIT_test_unions(dynamic_partial), + KUNIT_test_unions(assigned_dynamic_partial), /* Padding initialization depends on compiler behaviors. */ KUNIT_test_structs(static_partial), KUNIT_test_structs(static_all), @@ -452,8 +549,17 @@ static struct kunit_case stackinit_test_cases[] = { KUNIT_test_structs(assigned_static_partial), KUNIT_test_structs(assigned_static_all), KUNIT_test_structs(assigned_dynamic_all), + KUNIT_test_unions(static_partial), + KUNIT_test_unions(static_all), + KUNIT_test_unions(dynamic_all), + KUNIT_test_unions(runtime_partial), + KUNIT_test_unions(runtime_all), + KUNIT_test_unions(assigned_static_partial), + KUNIT_test_unions(assigned_static_all), + KUNIT_test_unions(assigned_dynamic_all), /* Everything fails this since it effectively performs a memcpy(). */ KUNIT_test_structs(assigned_copy), + KUNIT_test_unions(assigned_copy), /* STRUCTLEAK_BYREF_ALL should cover everything from here down. */ KUNIT_test_scalars(none), KUNIT_CASE(test_switch_1_none), diff --git a/lib/test_xarray.c b/lib/test_xarray.c index eab5971d0a48..6932a26f4927 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -6,10 +6,11 @@ * Author: Matthew Wilcox <willy@infradead.org> */ -#include <kunit/test.h> - -#include <linux/module.h> #include <linux/xarray.h> +#include <linux/module.h> + +static unsigned int tests_run; +static unsigned int tests_passed; static const unsigned int order_limit = IS_ENABLED(CONFIG_XARRAY_MULTI) ? BITS_PER_LONG : 1; @@ -19,12 +20,15 @@ static const unsigned int order_limit = void xa_dump(const struct xarray *xa) { } # endif #undef XA_BUG_ON -#define XA_BUG_ON(xa, x) do { \ - if (x) { \ - KUNIT_FAIL(test, #x); \ - xa_dump(xa); \ - dump_stack(); \ - } \ +#define XA_BUG_ON(xa, x) do { \ + tests_run++; \ + if (x) { \ + printk("BUG at %s:%d\n", __func__, __LINE__); \ + xa_dump(xa); \ + dump_stack(); \ + } else { \ + tests_passed++; \ + } \ } while (0) #endif @@ -38,13 +42,13 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) return xa_store(xa, index, xa_mk_index(index), gfp); } -static void xa_insert_index(struct kunit *test, struct xarray *xa, unsigned long index) +static void xa_insert_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index), GFP_KERNEL) != 0); } -static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long index, gfp_t gfp) +static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { u32 id; @@ -53,7 +57,7 @@ static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long XA_BUG_ON(xa, id != index); } -static void xa_erase_index(struct kunit *test, struct xarray *xa, unsigned long index) +static void xa_erase_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_index(index)); XA_BUG_ON(xa, xa_load(xa, index) != NULL); @@ -79,15 +83,8 @@ static void *xa_store_order(struct xarray *xa, unsigned long index, return curr; } -static inline struct xarray *xa_param(struct kunit *test) +static noinline void check_xa_err(struct xarray *xa) { - return *(struct xarray **)test->param_value; -} - -static noinline void check_xa_err(struct kunit *test) -{ - struct xarray *xa = xa_param(test); - XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0); XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0); #ifndef __KERNEL__ @@ -102,10 +99,8 @@ static noinline void check_xa_err(struct kunit *test) // XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL); } -static noinline void check_xas_retry(struct kunit *test) +static noinline void check_xas_retry(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; @@ -114,7 +109,7 @@ static noinline void check_xas_retry(struct kunit *test) rcu_read_lock(); XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0)); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 1); XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas))); XA_BUG_ON(xa, xas_retry(&xas, NULL)); XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0))); @@ -145,14 +140,12 @@ static noinline void check_xas_retry(struct kunit *test) } xas_unlock(&xas); - xa_erase_index(test, xa, 0); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 0); + xa_erase_index(xa, 1); } -static noinline void check_xa_load(struct kunit *test) +static noinline void check_xa_load(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i, j; for (i = 0; i < 1024; i++) { @@ -174,15 +167,13 @@ static noinline void check_xa_load(struct kunit *test) else XA_BUG_ON(xa, entry); } - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) +static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) { - struct xarray *xa = xa_param(test); - unsigned int order; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1; @@ -202,7 +193,7 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1)); /* Storing NULL clears marks, and they can't be set again */ - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); xa_set_mark(xa, index, XA_MARK_0); @@ -253,17 +244,15 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2)); - xa_erase_index(test, xa, index); - xa_erase_index(test, xa, next); + xa_erase_index(xa, index); + xa_erase_index(xa, next); XA_BUG_ON(xa, !xa_empty(xa)); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_mark_2(struct kunit *test) +static noinline void check_xa_mark_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long index; unsigned int count = 0; @@ -300,11 +289,9 @@ static noinline void check_xa_mark_2(struct kunit *test) xa_destroy(xa); } -static noinline void check_xa_mark_3(struct kunit *test) +static noinline void check_xa_mark_3(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0x41); void *entry; int count = 0; @@ -323,21 +310,19 @@ static noinline void check_xa_mark_3(struct kunit *test) #endif } -static noinline void check_xa_mark(struct kunit *test) +static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; for (index = 0; index < 16384; index += 4) - check_xa_mark_1(test, index); + check_xa_mark_1(xa, index); - check_xa_mark_2(test); - check_xa_mark_3(test); + check_xa_mark_2(xa); + check_xa_mark_3(xa); } -static noinline void check_xa_shrink(struct kunit *test) +static noinline void check_xa_shrink(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 1); struct xa_node *node; unsigned int order; @@ -362,7 +347,7 @@ static noinline void check_xa_shrink(struct kunit *test) XA_BUG_ON(xa, xas_load(&xas) != NULL); xas_unlock(&xas); XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); for (order = 0; order < max_order; order++) { @@ -379,49 +364,45 @@ static noinline void check_xa_shrink(struct kunit *test) XA_BUG_ON(xa, xa_head(xa) == node); rcu_read_unlock(); XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, xa->xa_head != node); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); } } -static noinline void check_insert(struct kunit *test) +static noinline void check_insert(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i; for (i = 0; i < 1024; i++) { - xa_insert_index(test, xa, i); + xa_insert_index(xa, i); XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL); XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL); - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); } for (i = 10; i < BITS_PER_LONG; i++) { - xa_insert_index(test, xa, 1UL << i); + xa_insert_index(xa, 1UL << i); XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL); XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL); - xa_erase_index(test, xa, 1UL << i); + xa_erase_index(xa, 1UL << i); - xa_insert_index(test, xa, (1UL << i) - 1); + xa_insert_index(xa, (1UL << i) - 1); XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL); XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL); - xa_erase_index(test, xa, (1UL << i) - 1); + xa_erase_index(xa, (1UL << i) - 1); } - xa_insert_index(test, xa, ~0UL); + xa_insert_index(xa, ~0UL); XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL); XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL); - xa_erase_index(test, xa, ~0UL); + xa_erase_index(xa, ~0UL); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_cmpxchg(struct kunit *test) +static noinline void check_cmpxchg(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *FIVE = xa_mk_value(5); void *SIX = xa_mk_value(6); void *LOTS = xa_mk_value(12345678); @@ -437,16 +418,14 @@ static noinline void check_cmpxchg(struct kunit *test) XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE); XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY); - xa_erase_index(test, xa, 12345678); - xa_erase_index(test, xa, 5); + xa_erase_index(xa, 12345678); + xa_erase_index(xa, 5); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_cmpxchg_order(struct kunit *test) +static noinline void check_cmpxchg_order(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - void *FIVE = xa_mk_value(5); unsigned int i, order = 3; @@ -497,10 +476,8 @@ static noinline void check_cmpxchg_order(struct kunit *test) #endif } -static noinline void check_reserve(struct kunit *test) +static noinline void check_reserve(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *entry; unsigned long index; int count; @@ -517,7 +494,7 @@ static noinline void check_reserve(struct kunit *test) XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL); xa_release(xa, 12345678); - xa_erase_index(test, xa, 12345678); + xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* cmpxchg sees a reserved entry as ZERO */ @@ -525,7 +502,7 @@ static noinline void check_reserve(struct kunit *test) XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY, xa_mk_value(12345678), GFP_NOWAIT) != NULL); xa_release(xa, 12345678); - xa_erase_index(test, xa, 12345678); + xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* xa_insert treats it as busy */ @@ -565,10 +542,8 @@ static noinline void check_reserve(struct kunit *test) xa_destroy(xa); } -static noinline void check_xas_erase(struct kunit *test) +static noinline void check_xas_erase(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; unsigned long i, j; @@ -606,11 +581,9 @@ static noinline void check_xas_erase(struct kunit *test) } #ifdef CONFIG_XARRAY_MULTI -static noinline void check_multi_store_1(struct kunit *test, unsigned long index, +static noinline void check_multi_store_1(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); unsigned long min = index & ~((1UL << order) - 1); unsigned long max = min + (1UL << order); @@ -629,15 +602,13 @@ static noinline void check_multi_store_1(struct kunit *test, unsigned long index XA_BUG_ON(xa, xa_load(xa, max) != NULL); XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL); - xa_erase_index(test, xa, min); + xa_erase_index(xa, min); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_store_2(struct kunit *test, unsigned long index, +static noinline void check_multi_store_2(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL); @@ -649,11 +620,9 @@ static noinline void check_multi_store_2(struct kunit *test, unsigned long index XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_store_3(struct kunit *test, unsigned long index, +static noinline void check_multi_store_3(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; int n = 0; @@ -678,11 +647,9 @@ static noinline void check_multi_store_3(struct kunit *test, unsigned long index } #endif -static noinline void check_multi_store(struct kunit *test) +static noinline void check_multi_store(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned long i, j, k; unsigned int max_order = (sizeof(long) == 4) ? 30 : 60; @@ -747,28 +714,26 @@ static noinline void check_multi_store(struct kunit *test) } for (i = 0; i < 20; i++) { - check_multi_store_1(test, 200, i); - check_multi_store_1(test, 0, i); - check_multi_store_1(test, (1UL << i) + 1, i); + check_multi_store_1(xa, 200, i); + check_multi_store_1(xa, 0, i); + check_multi_store_1(xa, (1UL << i) + 1, i); } - check_multi_store_2(test, 4095, 9); + check_multi_store_2(xa, 4095, 9); for (i = 1; i < 20; i++) { - check_multi_store_3(test, 0, i); - check_multi_store_3(test, 1UL << i, i); + check_multi_store_3(xa, 0, i); + check_multi_store_3(xa, 1UL << i, i); } #endif } #ifdef CONFIG_XARRAY_MULTI /* mimics page cache __filemap_add_folio() */ -static noinline void check_xa_multi_store_adv_add(struct kunit *test, +static noinline void check_xa_multi_store_adv_add(struct xarray *xa, unsigned long index, unsigned int order, void *p) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); unsigned int nrpages = 1UL << order; @@ -796,12 +761,10 @@ static noinline void check_xa_multi_store_adv_add(struct kunit *test, } /* mimics page_cache_delete() */ -static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test, +static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); xas_set_order(&xas, index, order); @@ -809,14 +772,12 @@ static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test, xas_init_marks(&xas); } -static noinline void check_xa_multi_store_adv_delete(struct kunit *test, +static noinline void check_xa_multi_store_adv_delete(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - xa_lock_irq(xa); - check_xa_multi_store_adv_del_entry(test, index, order); + check_xa_multi_store_adv_del_entry(xa, index, order); xa_unlock_irq(xa); } @@ -853,12 +814,10 @@ static unsigned long some_val = 0xdeadbeef; static unsigned long some_val_2 = 0xdeaddead; /* mimics the page cache usage */ -static noinline void check_xa_multi_store_adv(struct kunit *test, +static noinline void check_xa_multi_store_adv(struct xarray *xa, unsigned long pos, unsigned int order) { - struct xarray *xa = xa_param(test); - unsigned int nrpages = 1UL << order; unsigned long index, base, next_index, next_next_index; unsigned int i; @@ -868,7 +827,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, next_index = round_down(base + nrpages, nrpages); next_next_index = round_down(next_index + nrpages, nrpages); - check_xa_multi_store_adv_add(test, base, order, &some_val); + check_xa_multi_store_adv_add(xa, base, order, &some_val); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val); @@ -876,20 +835,20 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL); /* Use order 0 for the next item */ - check_xa_multi_store_adv_add(test, next_index, 0, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2); XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2); /* Remove the next item */ - check_xa_multi_store_adv_delete(test, next_index, 0); + check_xa_multi_store_adv_delete(xa, next_index, 0); /* Now use order for a new pointer */ - check_xa_multi_store_adv_add(test, next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2); - check_xa_multi_store_adv_delete(test, next_index, order); - check_xa_multi_store_adv_delete(test, base, order); + check_xa_multi_store_adv_delete(xa, next_index, order); + check_xa_multi_store_adv_delete(xa, base, order); XA_BUG_ON(xa, !xa_empty(xa)); /* starting fresh again */ @@ -897,7 +856,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, /* let's test some holes now */ /* hole at base and next_next */ - check_xa_multi_store_adv_add(test, next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); @@ -908,12 +867,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL); - check_xa_multi_store_adv_delete(test, next_index, order); + check_xa_multi_store_adv_delete(xa, next_index, order); XA_BUG_ON(xa, !xa_empty(xa)); /* hole at base and next */ - check_xa_multi_store_adv_add(test, next_next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); @@ -924,12 +883,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2); - check_xa_multi_store_adv_delete(test, next_next_index, order); + check_xa_multi_store_adv_delete(xa, next_next_index, order); XA_BUG_ON(xa, !xa_empty(xa)); } #endif -static noinline void check_multi_store_advanced(struct kunit *test) +static noinline void check_multi_store_advanced(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; @@ -941,59 +900,59 @@ static noinline void check_multi_store_advanced(struct kunit *test) */ for (pos = 7; pos < end; pos = (pos * pos) + 564) { for (i = 0; i < max_order; i++) { - check_xa_multi_store_adv(test, pos, i); - check_xa_multi_store_adv(test, pos + 157, i); + check_xa_multi_store_adv(xa, pos, i); + check_xa_multi_store_adv(xa, pos + 157, i); } } #endif } -static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; u32 id; XA_BUG_ON(xa, !xa_empty(xa)); /* An empty array should assign %base to the first alloc */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* Erasing it should make the array empty again */ - xa_erase_index(test, xa, base); + xa_erase_index(xa, base); XA_BUG_ON(xa, !xa_empty(xa)); /* And it should assign %base again */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* Allocating and then erasing a lot should not lose base */ for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++) - xa_alloc_index(test, xa, i, GFP_KERNEL); + xa_alloc_index(xa, i, GFP_KERNEL); for (i = base; i < 2 * XA_CHUNK_SIZE; i++) - xa_erase_index(test, xa, i); - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_erase_index(xa, i); + xa_alloc_index(xa, base, GFP_KERNEL); /* Destroying the array should do the same as erasing */ xa_destroy(xa); /* And it should assign %base again */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* The next assigned ID should be base+1 */ - xa_alloc_index(test, xa, base + 1, GFP_KERNEL); - xa_erase_index(test, xa, base + 1); + xa_alloc_index(xa, base + 1, GFP_KERNEL); + xa_erase_index(xa, base + 1); /* Storing a value should mark it used */ xa_store_index(xa, base + 1, GFP_KERNEL); - xa_alloc_index(test, xa, base + 2, GFP_KERNEL); + xa_alloc_index(xa, base + 2, GFP_KERNEL); /* If we then erase base, it should be free */ - xa_erase_index(test, xa, base); - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_erase_index(xa, base); + xa_alloc_index(xa, base, GFP_KERNEL); - xa_erase_index(test, xa, base + 1); - xa_erase_index(test, xa, base + 2); + xa_erase_index(xa, base + 1); + xa_erase_index(xa, base + 2); for (i = 1; i < 5000; i++) { - xa_alloc_index(test, xa, base + i, GFP_KERNEL); + xa_alloc_index(xa, base + i, GFP_KERNEL); } xa_destroy(xa); @@ -1016,14 +975,14 @@ static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), GFP_KERNEL) != -EBUSY); - XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), GFP_KERNEL) != -EBUSY); - xa_erase_index(test, xa, 3); + xa_erase_index(xa, 3); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) { unsigned int i, id; unsigned long index; @@ -1059,7 +1018,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, id != 5); xa_for_each(xa, index, entry) { - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); } for (i = base; i < base + 9; i++) { @@ -1074,7 +1033,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns xa_destroy(xa); } -static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base) { struct xa_limit limit = XA_LIMIT(1, 0x3fff); u32 next = 0; @@ -1090,8 +1049,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit, &next, GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0x3ffd); - xa_erase_index(test, xa, 0x3ffd); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 0x3ffd); + xa_erase_index(xa, 1); XA_BUG_ON(xa, !xa_empty(xa)); for (i = 0x3ffe; i < 0x4003; i++) { @@ -1106,8 +1065,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns /* Check wrap-around is handled correctly */ if (base != 0) - xa_erase_index(test, xa, base); - xa_erase_index(test, xa, base + 1); + xa_erase_index(xa, base); + xa_erase_index(xa, base + 1); next = UINT_MAX; XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX), xa_limit_32b, &next, GFP_KERNEL) != 0); @@ -1120,7 +1079,7 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, id != base + 1); xa_for_each(xa, index, entry) - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); XA_BUG_ON(xa, !xa_empty(xa)); } @@ -1128,21 +1087,19 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); -static noinline void check_xa_alloc(struct kunit *test) +static noinline void check_xa_alloc(void) { - check_xa_alloc_1(test, &xa0, 0); - check_xa_alloc_1(test, &xa1, 1); - check_xa_alloc_2(test, &xa0, 0); - check_xa_alloc_2(test, &xa1, 1); - check_xa_alloc_3(test, &xa0, 0); - check_xa_alloc_3(test, &xa1, 1); + check_xa_alloc_1(&xa0, 0); + check_xa_alloc_1(&xa1, 1); + check_xa_alloc_2(&xa0, 0); + check_xa_alloc_2(&xa1, 1); + check_xa_alloc_3(&xa0, 0); + check_xa_alloc_3(&xa1, 1); } -static noinline void __check_store_iter(struct kunit *test, unsigned long start, +static noinline void __check_store_iter(struct xarray *xa, unsigned long start, unsigned int order, unsigned int present) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, start, order); void *entry; unsigned int count = 0; @@ -1166,54 +1123,50 @@ retry: XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_index(start)); XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) != xa_mk_index(start)); - xa_erase_index(test, xa, start); + xa_erase_index(xa, start); } -static noinline void check_store_iter(struct kunit *test) +static noinline void check_store_iter(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int i, j; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; for (i = 0; i < max_order; i++) { unsigned int min = 1 << i; unsigned int max = (2 << i) - 1; - __check_store_iter(test, 0, i, 0); + __check_store_iter(xa, 0, i, 0); XA_BUG_ON(xa, !xa_empty(xa)); - __check_store_iter(test, min, i, 0); + __check_store_iter(xa, min, i, 0); XA_BUG_ON(xa, !xa_empty(xa)); xa_store_index(xa, min, GFP_KERNEL); - __check_store_iter(test, min, i, 1); + __check_store_iter(xa, min, i, 1); XA_BUG_ON(xa, !xa_empty(xa)); xa_store_index(xa, max, GFP_KERNEL); - __check_store_iter(test, min, i, 1); + __check_store_iter(xa, min, i, 1); XA_BUG_ON(xa, !xa_empty(xa)); for (j = 0; j < min; j++) xa_store_index(xa, j, GFP_KERNEL); - __check_store_iter(test, 0, i, min); + __check_store_iter(xa, 0, i, min); XA_BUG_ON(xa, !xa_empty(xa)); for (j = 0; j < min; j++) xa_store_index(xa, min + j, GFP_KERNEL); - __check_store_iter(test, min, i, min); + __check_store_iter(xa, min, i, min); XA_BUG_ON(xa, !xa_empty(xa)); } #ifdef CONFIG_XARRAY_MULTI xa_store_index(xa, 63, GFP_KERNEL); xa_store_index(xa, 65, GFP_KERNEL); - __check_store_iter(test, 64, 2, 1); - xa_erase_index(test, xa, 63); + __check_store_iter(xa, 64, 2, 1); + xa_erase_index(xa, 63); #endif XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_find_1(struct kunit *test, unsigned int order) +static noinline void check_multi_find_1(struct xarray *xa, unsigned order) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned long multi = 3 << order; unsigned long next = 4 << order; unsigned long index; @@ -1236,17 +1189,15 @@ static noinline void check_multi_find_1(struct kunit *test, unsigned int order) XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL); XA_BUG_ON(xa, index != next); - xa_erase_index(test, xa, multi); - xa_erase_index(test, xa, next); - xa_erase_index(test, xa, next + 1); + xa_erase_index(xa, multi); + xa_erase_index(xa, next); + xa_erase_index(xa, next + 1); XA_BUG_ON(xa, !xa_empty(xa)); #endif } -static noinline void check_multi_find_2(struct kunit *test) +static noinline void check_multi_find_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1; unsigned int i, j; void *entry; @@ -1260,19 +1211,17 @@ static noinline void check_multi_find_2(struct kunit *test) GFP_KERNEL); rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); } rcu_read_unlock(); - xa_erase_index(test, xa, index - 1); + xa_erase_index(xa, index - 1); XA_BUG_ON(xa, !xa_empty(xa)); } } } -static noinline void check_multi_find_3(struct kunit *test) +static noinline void check_multi_find_3(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int order; for (order = 5; order < order_limit; order++) { @@ -1281,14 +1230,12 @@ static noinline void check_multi_find_3(struct kunit *test) XA_BUG_ON(xa, !xa_empty(xa)); xa_store_order(xa, 0, order - 4, xa_mk_index(0), GFP_KERNEL); XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT)); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); } } -static noinline void check_find_1(struct kunit *test) +static noinline void check_find_1(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i, j, k; XA_BUG_ON(xa, !xa_empty(xa)); @@ -1325,20 +1272,18 @@ static noinline void check_find_1(struct kunit *test) else XA_BUG_ON(xa, entry != NULL); } - xa_erase_index(test, xa, j); + xa_erase_index(xa, j); XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0)); XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); } - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0)); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_find_2(struct kunit *test) +static noinline void check_find_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *entry; unsigned long i, j, index; @@ -1358,10 +1303,8 @@ static noinline void check_find_2(struct kunit *test) xa_destroy(xa); } -static noinline void check_find_3(struct kunit *test) +static noinline void check_find_3(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long i, j, k; void *entry; @@ -1385,10 +1328,8 @@ static noinline void check_find_3(struct kunit *test) xa_destroy(xa); } -static noinline void check_find_4(struct kunit *test) +static noinline void check_find_4(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long index = 0; void *entry; @@ -1400,22 +1341,22 @@ static noinline void check_find_4(struct kunit *test) entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT); XA_BUG_ON(xa, entry); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); } -static noinline void check_find(struct kunit *test) +static noinline void check_find(struct xarray *xa) { unsigned i; - check_find_1(test); - check_find_2(test); - check_find_3(test); - check_find_4(test); + check_find_1(xa); + check_find_2(xa); + check_find_3(xa); + check_find_4(xa); for (i = 2; i < 10; i++) - check_multi_find_1(test, i); - check_multi_find_2(test); - check_multi_find_3(test); + check_multi_find_1(xa, i); + check_multi_find_2(xa); + check_multi_find_3(xa); } /* See find_swap_entry() in mm/shmem.c */ @@ -1441,10 +1382,8 @@ static noinline unsigned long xa_find_entry(struct xarray *xa, void *item) return entry ? xas.xa_index : -1; } -static noinline void check_find_entry(struct kunit *test) +static noinline void check_find_entry(struct xarray *xa) { - struct xarray *xa = xa_param(test); - #ifdef CONFIG_XARRAY_MULTI unsigned int order; unsigned long offset, index; @@ -1471,14 +1410,12 @@ static noinline void check_find_entry(struct kunit *test) xa_store_index(xa, ULONG_MAX, GFP_KERNEL); XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1); XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_index(ULONG_MAX)) != -1); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_pause(struct kunit *test) +static noinline void check_pause(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; unsigned int order; @@ -1548,10 +1485,8 @@ static noinline void check_pause(struct kunit *test) } -static noinline void check_move_tiny(struct kunit *test) +static noinline void check_move_tiny(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); @@ -1568,14 +1503,12 @@ static noinline void check_move_tiny(struct kunit *test) XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0)); XA_BUG_ON(xa, xas_prev(&xas) != NULL); rcu_read_unlock(); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move_max(struct kunit *test) +static noinline void check_move_max(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); xa_store_index(xa, ULONG_MAX, GFP_KERNEL); @@ -1591,14 +1524,12 @@ static noinline void check_move_max(struct kunit *test) XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL); rcu_read_unlock(); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move_small(struct kunit *test, unsigned long idx) +static noinline void check_move_small(struct xarray *xa, unsigned long idx) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long i; @@ -1640,15 +1571,13 @@ static noinline void check_move_small(struct kunit *test, unsigned long idx) XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); rcu_read_unlock(); - xa_erase_index(test, xa, 0); - xa_erase_index(test, xa, idx); + xa_erase_index(xa, 0); + xa_erase_index(xa, idx); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move(struct kunit *test) +static noinline void check_move(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, (1 << 16) - 1); unsigned long i; @@ -1675,7 +1604,7 @@ static noinline void check_move(struct kunit *test) rcu_read_unlock(); for (i = (1 << 8); i < (1 << 15); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); i = xas.xa_index; @@ -1706,17 +1635,17 @@ static noinline void check_move(struct kunit *test) xa_destroy(xa); - check_move_tiny(test); - check_move_max(test); + check_move_tiny(xa); + check_move_max(xa); for (i = 0; i < 16; i++) - check_move_small(test, 1UL << i); + check_move_small(xa, 1UL << i); for (i = 2; i < 16; i++) - check_move_small(test, (1UL << i) - 1); + check_move_small(xa, (1UL << i) - 1); } -static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa, +static noinline void xa_store_many_order(struct xarray *xa, unsigned long index, unsigned order) { XA_STATE_ORDER(xas, xa, index, order); @@ -1739,34 +1668,30 @@ unlock: XA_BUG_ON(xa, xas_error(&xas)); } -static noinline void check_create_range_1(struct kunit *test, +static noinline void check_create_range_1(struct xarray *xa, unsigned long index, unsigned order) { - struct xarray *xa = xa_param(test); - unsigned long i; - xa_store_many_order(test, xa, index, order); + xa_store_many_order(xa, index, order); for (i = index; i < index + (1UL << order); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_2(struct kunit *test, unsigned int order) +static noinline void check_create_range_2(struct xarray *xa, unsigned order) { - struct xarray *xa = xa_param(test); - unsigned long i; unsigned long nr = 1UL << order; for (i = 0; i < nr * nr; i += nr) - xa_store_many_order(test, xa, i, order); + xa_store_many_order(xa, i, order); for (i = 0; i < nr * nr; i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_3(struct kunit *test) +static noinline void check_create_range_3(void) { XA_STATE(xas, NULL, 0); xas_set_err(&xas, -EEXIST); @@ -1774,11 +1699,9 @@ static noinline void check_create_range_3(struct kunit *test) XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST); } -static noinline void check_create_range_4(struct kunit *test, +static noinline void check_create_range_4(struct xarray *xa, unsigned long index, unsigned order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, order); unsigned long base = xas.xa_index; unsigned long i = 0; @@ -1804,15 +1727,13 @@ unlock: XA_BUG_ON(xa, xas_error(&xas)); for (i = base; i < base + (1UL << order); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_5(struct kunit *test, +static noinline void check_create_range_5(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, order); unsigned int i; @@ -1829,46 +1750,44 @@ static noinline void check_create_range_5(struct kunit *test, xa_destroy(xa); } -static noinline void check_create_range(struct kunit *test) +static noinline void check_create_range(struct xarray *xa) { unsigned int order; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1; for (order = 0; order < max_order; order++) { - check_create_range_1(test, 0, order); - check_create_range_1(test, 1U << order, order); - check_create_range_1(test, 2U << order, order); - check_create_range_1(test, 3U << order, order); - check_create_range_1(test, 1U << 24, order); + check_create_range_1(xa, 0, order); + check_create_range_1(xa, 1U << order, order); + check_create_range_1(xa, 2U << order, order); + check_create_range_1(xa, 3U << order, order); + check_create_range_1(xa, 1U << 24, order); if (order < 10) - check_create_range_2(test, order); - - check_create_range_4(test, 0, order); - check_create_range_4(test, 1U << order, order); - check_create_range_4(test, 2U << order, order); - check_create_range_4(test, 3U << order, order); - check_create_range_4(test, 1U << 24, order); - - check_create_range_4(test, 1, order); - check_create_range_4(test, (1U << order) + 1, order); - check_create_range_4(test, (2U << order) + 1, order); - check_create_range_4(test, (2U << order) - 1, order); - check_create_range_4(test, (3U << order) + 1, order); - check_create_range_4(test, (3U << order) - 1, order); - check_create_range_4(test, (1U << 24) + 1, order); - - check_create_range_5(test, 0, order); - check_create_range_5(test, (1U << order), order); + check_create_range_2(xa, order); + + check_create_range_4(xa, 0, order); + check_create_range_4(xa, 1U << order, order); + check_create_range_4(xa, 2U << order, order); + check_create_range_4(xa, 3U << order, order); + check_create_range_4(xa, 1U << 24, order); + + check_create_range_4(xa, 1, order); + check_create_range_4(xa, (1U << order) + 1, order); + check_create_range_4(xa, (2U << order) + 1, order); + check_create_range_4(xa, (2U << order) - 1, order); + check_create_range_4(xa, (3U << order) + 1, order); + check_create_range_4(xa, (3U << order) - 1, order); + check_create_range_4(xa, (1U << 24) + 1, order); + + check_create_range_5(xa, 0, order); + check_create_range_5(xa, (1U << order), order); } - check_create_range_3(test); + check_create_range_3(); } -static noinline void __check_store_range(struct kunit *test, unsigned long first, +static noinline void __check_store_range(struct xarray *xa, unsigned long first, unsigned long last) { - struct xarray *xa = xa_param(test); - #ifdef CONFIG_XARRAY_MULTI xa_store_range(xa, first, last, xa_mk_index(first), GFP_KERNEL); @@ -1883,28 +1802,26 @@ static noinline void __check_store_range(struct kunit *test, unsigned long first XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_store_range(struct kunit *test) +static noinline void check_store_range(struct xarray *xa) { unsigned long i, j; for (i = 0; i < 128; i++) { for (j = i; j < 128; j++) { - __check_store_range(test, i, j); - __check_store_range(test, 128 + i, 128 + j); - __check_store_range(test, 4095 + i, 4095 + j); - __check_store_range(test, 4096 + i, 4096 + j); - __check_store_range(test, 123456 + i, 123456 + j); - __check_store_range(test, (1 << 24) + i, (1 << 24) + j); + __check_store_range(xa, i, j); + __check_store_range(xa, 128 + i, 128 + j); + __check_store_range(xa, 4095 + i, 4095 + j); + __check_store_range(xa, 4096 + i, 4096 + j); + __check_store_range(xa, 123456 + i, 123456 + j); + __check_store_range(xa, (1 << 24) + i, (1 << 24) + j); } } } #ifdef CONFIG_XARRAY_MULTI -static void check_split_1(struct kunit *test, unsigned long index, +static void check_split_1(struct xarray *xa, unsigned long index, unsigned int order, unsigned int new_order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, new_order); unsigned int i, found; void *entry; @@ -1940,30 +1857,26 @@ static void check_split_1(struct kunit *test, unsigned long index, xa_destroy(xa); } -static noinline void check_split(struct kunit *test) +static noinline void check_split(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int order, new_order; XA_BUG_ON(xa, !xa_empty(xa)); for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) { for (new_order = 0; new_order < order; new_order++) { - check_split_1(test, 0, order, new_order); - check_split_1(test, 1UL << order, order, new_order); - check_split_1(test, 3UL << order, order, new_order); + check_split_1(xa, 0, order, new_order); + check_split_1(xa, 1UL << order, order, new_order); + check_split_1(xa, 3UL << order, order, new_order); } } } #else -static void check_split(struct kunit *test) { } +static void check_split(struct xarray *xa) { } #endif -static void check_align_1(struct kunit *test, char *name) +static void check_align_1(struct xarray *xa, char *name) { - struct xarray *xa = xa_param(test); - int i; unsigned int id; unsigned long index; @@ -1983,10 +1896,8 @@ static void check_align_1(struct kunit *test, char *name) * We should always be able to store without allocating memory after * reserving a slot. */ -static void check_align_2(struct kunit *test, char *name) +static void check_align_2(struct xarray *xa, char *name) { - struct xarray *xa = xa_param(test); - int i; XA_BUG_ON(xa, !xa_empty(xa)); @@ -2005,15 +1916,15 @@ static void check_align_2(struct kunit *test, char *name) XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_align(struct kunit *test) +static noinline void check_align(struct xarray *xa) { char name[] = "Motorola 68000"; - check_align_1(test, name); - check_align_1(test, name + 1); - check_align_1(test, name + 2); - check_align_1(test, name + 3); - check_align_2(test, name); + check_align_1(xa, name); + check_align_1(xa, name + 1); + check_align_1(xa, name + 2); + check_align_1(xa, name + 3); + check_align_2(xa, name); } static LIST_HEAD(shadow_nodes); @@ -2029,7 +1940,7 @@ static void test_update_node(struct xa_node *node) } } -static noinline void shadow_remove(struct kunit *test, struct xarray *xa) +static noinline void shadow_remove(struct xarray *xa) { struct xa_node *node; @@ -2043,17 +1954,8 @@ static noinline void shadow_remove(struct kunit *test, struct xarray *xa) xa_unlock(xa); } -struct workingset_testcase { - struct xarray *xa; - unsigned long index; -}; - -static noinline void check_workingset(struct kunit *test) +static noinline void check_workingset(struct xarray *xa, unsigned long index) { - struct workingset_testcase tc = *(struct workingset_testcase *)test->param_value; - struct xarray *xa = tc.xa; - unsigned long index = tc.index; - XA_STATE(xas, xa, index); xas_set_update(&xas, test_update_node); @@ -2076,7 +1978,7 @@ static noinline void check_workingset(struct kunit *test) xas_unlock(&xas); XA_BUG_ON(xa, list_empty(&shadow_nodes)); - shadow_remove(test, xa); + shadow_remove(xa); XA_BUG_ON(xa, !list_empty(&shadow_nodes)); XA_BUG_ON(xa, !xa_empty(xa)); } @@ -2085,11 +1987,9 @@ static noinline void check_workingset(struct kunit *test) * Check that the pointer / value / sibling entries are accounted the * way we expect them to be. */ -static noinline void check_account(struct kunit *test) +static noinline void check_account(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned int order; for (order = 1; order < 12; order++) { @@ -2116,10 +2016,8 @@ static noinline void check_account(struct kunit *test) #endif } -static noinline void check_get_order(struct kunit *test) +static noinline void check_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; unsigned int order; unsigned long i, j; @@ -2138,10 +2036,8 @@ static noinline void check_get_order(struct kunit *test) } } -static noinline void check_xas_get_order(struct kunit *test) +static noinline void check_xas_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; @@ -2173,10 +2069,8 @@ static noinline void check_xas_get_order(struct kunit *test) } } -static noinline void check_xas_conflict_get_order(struct kunit *test) +static noinline void check_xas_conflict_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; @@ -2233,10 +2127,8 @@ static noinline void check_xas_conflict_get_order(struct kunit *test) } -static noinline void check_destroy(struct kunit *test) +static noinline void check_destroy(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long index; XA_BUG_ON(xa, !xa_empty(xa)); @@ -2269,59 +2161,52 @@ static noinline void check_destroy(struct kunit *test) } static DEFINE_XARRAY(array); -static struct xarray *arrays[] = { &array }; -KUNIT_ARRAY_PARAM(array, arrays, NULL); - -static struct xarray *xa0s[] = { &xa0 }; -KUNIT_ARRAY_PARAM(xa0, xa0s, NULL); - -static struct workingset_testcase workingset_testcases[] = { - { &array, 0 }, - { &array, 64 }, - { &array, 4096 }, -}; -KUNIT_ARRAY_PARAM(workingset, workingset_testcases, NULL); - -static struct kunit_case xarray_cases[] = { - KUNIT_CASE_PARAM(check_xa_err, array_gen_params), - KUNIT_CASE_PARAM(check_xas_retry, array_gen_params), - KUNIT_CASE_PARAM(check_xa_load, array_gen_params), - KUNIT_CASE_PARAM(check_xa_mark, array_gen_params), - KUNIT_CASE_PARAM(check_xa_shrink, array_gen_params), - KUNIT_CASE_PARAM(check_xas_erase, array_gen_params), - KUNIT_CASE_PARAM(check_insert, array_gen_params), - KUNIT_CASE_PARAM(check_cmpxchg, array_gen_params), - KUNIT_CASE_PARAM(check_cmpxchg_order, array_gen_params), - KUNIT_CASE_PARAM(check_reserve, array_gen_params), - KUNIT_CASE_PARAM(check_reserve, xa0_gen_params), - KUNIT_CASE_PARAM(check_multi_store, array_gen_params), - KUNIT_CASE_PARAM(check_multi_store_advanced, array_gen_params), - KUNIT_CASE_PARAM(check_get_order, array_gen_params), - KUNIT_CASE_PARAM(check_xas_get_order, array_gen_params), - KUNIT_CASE_PARAM(check_xas_conflict_get_order, array_gen_params), - KUNIT_CASE(check_xa_alloc), - KUNIT_CASE_PARAM(check_find, array_gen_params), - KUNIT_CASE_PARAM(check_find_entry, array_gen_params), - KUNIT_CASE_PARAM(check_pause, array_gen_params), - KUNIT_CASE_PARAM(check_account, array_gen_params), - KUNIT_CASE_PARAM(check_destroy, array_gen_params), - KUNIT_CASE_PARAM(check_move, array_gen_params), - KUNIT_CASE_PARAM(check_create_range, array_gen_params), - KUNIT_CASE_PARAM(check_store_range, array_gen_params), - KUNIT_CASE_PARAM(check_store_iter, array_gen_params), - KUNIT_CASE_PARAM(check_align, xa0_gen_params), - KUNIT_CASE_PARAM(check_split, array_gen_params), - KUNIT_CASE_PARAM(check_workingset, workingset_gen_params), - {}, -}; - -static struct kunit_suite xarray_suite = { - .name = "xarray", - .test_cases = xarray_cases, -}; - -kunit_test_suite(xarray_suite); +static int xarray_checks(void) +{ + check_xa_err(&array); + check_xas_retry(&array); + check_xa_load(&array); + check_xa_mark(&array); + check_xa_shrink(&array); + check_xas_erase(&array); + check_insert(&array); + check_cmpxchg(&array); + check_cmpxchg_order(&array); + check_reserve(&array); + check_reserve(&xa0); + check_multi_store(&array); + check_multi_store_advanced(&array); + check_get_order(&array); + check_xas_get_order(&array); + check_xas_conflict_get_order(&array); + check_xa_alloc(); + check_find(&array); + check_find_entry(&array); + check_pause(&array); + check_account(&array); + check_destroy(&array); + check_move(&array); + check_create_range(&array); + check_store_range(&array); + check_store_iter(&array); + check_align(&xa0); + check_split(&array); + + check_workingset(&array, 0); + check_workingset(&array, 64); + check_workingset(&array, 4096); + + printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); + return (tests_run == tests_passed) ? 0 : -EINVAL; +} + +static void xarray_exit(void) +{ +} + +module_init(xarray_checks); +module_exit(xarray_exit); MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>"); MODULE_DESCRIPTION("XArray API test module"); MODULE_LICENSE("GPL"); diff --git a/mm/compaction.c b/mm/compaction.c index bcc0df0066dc..12ed8425fa17 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2491,7 +2491,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, */ static enum compact_result compaction_suit_allocation_order(struct zone *zone, unsigned int order, - int highest_zoneidx, unsigned int alloc_flags) + int highest_zoneidx, unsigned int alloc_flags, + bool async) { unsigned long watermark; @@ -2500,6 +2501,23 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order, alloc_flags)) return COMPACT_SUCCESS; + /* + * For unmovable allocations (without ALLOC_CMA), check if there is enough + * free memory in the non-CMA pageblocks. Otherwise compaction could form + * the high-order page in CMA pageblocks, which would not help the + * allocation to succeed. However, limit the check to costly order async + * compaction (such as opportunistic THP attempts) because there is the + * possibility that compaction would migrate pages from non-CMA to CMA + * pageblock. + */ + if (order > PAGE_ALLOC_COSTLY_ORDER && async && + !(alloc_flags & ALLOC_CMA)) { + watermark = low_wmark_pages(zone) + compact_gap(order); + if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx, + 0, zone_page_state(zone, NR_FREE_PAGES))) + return COMPACT_SKIPPED; + } + if (!compaction_suitable(zone, order, highest_zoneidx)) return COMPACT_SKIPPED; @@ -2535,7 +2553,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) if (!is_via_compact_memory(cc->order)) { ret = compaction_suit_allocation_order(cc->zone, cc->order, cc->highest_zoneidx, - cc->alloc_flags); + cc->alloc_flags, + cc->mode == MIGRATE_ASYNC); if (ret != COMPACT_CONTINUE) return ret; } @@ -3038,7 +3057,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) ret = compaction_suit_allocation_order(zone, pgdat->kcompactd_max_order, - highest_zoneidx, ALLOC_WMARK_MIN); + highest_zoneidx, ALLOC_WMARK_MIN, + false); if (ret == COMPACT_CONTINUE) return true; } @@ -3079,7 +3099,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) continue; ret = compaction_suit_allocation_order(zone, - cc.order, zoneid, ALLOC_WMARK_MIN); + cc.order, zoneid, ALLOC_WMARK_MIN, + false); if (ret != COMPACT_CONTINUE) continue; @@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_folios *pofs) /* * Returns the number of collected folios. Return value is always >= 0. */ -static unsigned long collect_longterm_unpinnable_folios( +static void collect_longterm_unpinnable_folios( struct list_head *movable_folio_list, struct pages_or_folios *pofs) { - unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; + unsigned long i; for (i = 0; i < pofs->nr_entries; i++) { struct folio *folio = pofs_get_folio(pofs, i); @@ -2338,8 +2338,6 @@ static unsigned long collect_longterm_unpinnable_folios( if (folio_is_longterm_pinnable(folio)) continue; - collected++; - if (folio_is_device_coherent(folio)) continue; @@ -2361,8 +2359,6 @@ static unsigned long collect_longterm_unpinnable_folios( NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } - - return collected; } /* @@ -2439,11 +2435,9 @@ static long check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) { LIST_HEAD(movable_folio_list); - unsigned long collected; - collected = collect_longterm_unpinnable_folios(&movable_folio_list, - pofs); - if (!collected) + collect_longterm_unpinnable_folios(&movable_folio_list, pofs); + if (list_empty(&movable_folio_list)) return 0; return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3b25b69aa94f..65068671e460 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3309,7 +3309,7 @@ static void __init gather_bootmem_prealloc(void) .thread_fn = gather_bootmem_prealloc_parallel, .fn_arg = NULL, .start = 0, - .size = num_node_state(N_MEMORY), + .size = nr_node_ids, .align = 1, .min_chunk = 1, .max_threads = num_node_state(N_MEMORY), diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 67fc321db79b..102048821c22 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -21,6 +21,7 @@ #include <linux/log2.h> #include <linux/memblock.h> #include <linux/moduleparam.h> +#include <linux/nodemask.h> #include <linux/notifier.h> #include <linux/panic_notifier.h> #include <linux/random.h> @@ -1084,6 +1085,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || + ((flags & __GFP_THISNODE) && num_online_nodes() > 1) || (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 982bb5ef3233..c6ed68604136 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1689,7 +1689,7 @@ static void kmemleak_scan(void) unsigned long phys = object->pointer; if (PHYS_PFN(phys) < min_low_pfn || - PHYS_PFN(phys + object->size) >= max_low_pfn) + PHYS_PFN(phys + object->size) > max_low_pfn) __paint_it(object, KMEMLEAK_BLACK); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 6e867c16ea93..ba19430dd4ea 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -794,7 +794,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim)) continue; if (need_reclaim) { - ret = cluster_reclaim_range(si, ci, start, end); + ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages); /* * Reclaim drops ci->lock and cluster could be used * by another order. Not checking flag as off-list diff --git a/mm/vmscan.c b/mm/vmscan.c index 683ec56d4f60..c767d71c43d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1086,7 +1086,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, struct folio_batch free_folios; LIST_HEAD(ret_folios); LIST_HEAD(demote_folios); - unsigned int nr_reclaimed = 0; + unsigned int nr_reclaimed = 0, nr_demoted = 0; unsigned int pgactivate = 0; bool do_demote_pass; struct swap_iocb *plug = NULL; @@ -1550,8 +1550,9 @@ keep: /* 'folio_list' is always empty here */ /* Migrate folios selected for demotion */ - stat->nr_demoted = demote_folio_list(&demote_folios, pgdat); - nr_reclaimed += stat->nr_demoted; + nr_demoted = demote_folio_list(&demote_folios, pgdat); + nr_reclaimed += nr_demoted; + stat->nr_demoted += nr_demoted; /* Folios that could not be demoted are still in @demote_folios */ if (!list_empty(&demote_folios)) { /* Folios which weren't demoted go back on @folio_list */ @@ -1692,6 +1693,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; unsigned long skipped = 0; unsigned long scan, total_scan, nr_pages; + unsigned long max_nr_skipped = 0; LIST_HEAD(folios_skipped); total_scan = 0; @@ -1706,9 +1708,12 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx) { + /* Using max_nr_skipped to prevent hard LOCKUP*/ + if (max_nr_skipped < SWAP_CLUSTER_MAX_SKIPPED && + (folio_zonenum(folio) > sc->reclaim_idx)) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; + max_nr_skipped++; goto move; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 817626a351f8..6d0e47f7ae33 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -452,7 +452,7 @@ static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { .lock = INIT_LOCAL_LOCK(lock), }; -static inline bool is_first_zpdesc(struct zpdesc *zpdesc) +static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) { return PagePrivate(zpdesc_page(zpdesc)); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 49f97d4138ea..46ea0bee2259 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -710,12 +710,12 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { case L2CAP_CID_ATT: - if (mtu < L2CAP_LE_MIN_MTU) + if (mtu && mtu < L2CAP_LE_MIN_MTU) return false; break; default: - if (mtu < L2CAP_DEFAULT_MIN_MTU) + if (mtu && mtu < L2CAP_DEFAULT_MIN_MTU) return false; } diff --git a/net/core/dev.c b/net/core/dev.c index afa2282f2604..c0021cbd28fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6708,7 +6708,7 @@ void napi_resume_irqs(unsigned int napi_id) static void __napi_hash_add_with_id(struct napi_struct *napi, unsigned int napi_id) { - napi->napi_id = napi_id; + WRITE_ONCE(napi->napi_id, napi_id); hlist_add_head_rcu(&napi->napi_hash_node, &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); } @@ -9924,6 +9924,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Program bound to different device"); return -EINVAL; } + if (bpf_prog_is_dev_bound(new_prog->aux) && mode == XDP_MODE_SKB) { + NL_SET_ERR_MSG(extack, "Can't attach device-bound programs in generic mode"); + return -EINVAL; + } if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); return -EINVAL; @@ -10260,37 +10264,14 @@ static bool from_cleanup_net(void) #endif } -static void rtnl_drop_if_cleanup_net(void) -{ - if (from_cleanup_net()) - __rtnl_unlock(); -} - -static void rtnl_acquire_if_cleanup_net(void) -{ - if (from_cleanup_net()) - rtnl_lock(); -} - /* Delayed registration/unregisteration */ LIST_HEAD(net_todo_list); -static LIST_HEAD(net_todo_list_for_cleanup_net); - -/* TODO: net_todo_list/net_todo_list_for_cleanup_net should probably - * be provided by callers, instead of being static, rtnl protected. - */ -static struct list_head *todo_list(void) -{ - return from_cleanup_net() ? &net_todo_list_for_cleanup_net : - &net_todo_list; -} - DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); atomic_t dev_unreg_count = ATOMIC_INIT(0); static void net_set_todo(struct net_device *dev) { - list_add_tail(&dev->todo_list, todo_list()); + list_add_tail(&dev->todo_list, &net_todo_list); } static netdev_features_t netdev_sync_upper_features(struct net_device *lower, @@ -11140,7 +11121,7 @@ void netdev_run_todo(void) #endif /* Snapshot list, allow later requests */ - list_replace_init(todo_list(), &list); + list_replace_init(&net_todo_list, &list); __rtnl_unlock(); @@ -11785,11 +11766,9 @@ void unregister_netdevice_many_notify(struct list_head *head, WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); netdev_unlock(dev); } - - rtnl_drop_if_cleanup_net(); flush_all_backlogs(); + synchronize_net(); - rtnl_acquire_if_cleanup_net(); list_for_each_entry(dev, head, unreg_list) { struct sk_buff *skb = NULL; @@ -11849,9 +11828,7 @@ void unregister_netdevice_many_notify(struct list_head *head, #endif } - rtnl_drop_if_cleanup_net(); synchronize_net(); - rtnl_acquire_if_cleanup_net(); list_for_each_entry(dev, head, unreg_list) { netdev_put(dev, &dev->dev_registered_tracker); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1906c62dee85..f5e908c9e7ad 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1146,7 +1146,9 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); + mutex_unlock(&page_pools_lock); } EXPORT_SYMBOL(page_pool_disable_direct_recycling); diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 57439787b9c2..2fb06d5f6d55 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -7,6 +7,8 @@ #include "netmem_priv.h" +extern struct mutex page_pools_lock; + s32 page_pool_inflight(const struct page_pool *pool, bool strict); int page_pool_list(struct page_pool *pool); diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 48335766c1bf..6677e0c2e256 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -3,6 +3,7 @@ #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/xarray.h> +#include <net/busy_poll.h> #include <net/net_debug.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> @@ -14,10 +15,11 @@ #include "netdev-genl-gen.h" static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1); -/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user. +/* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev, + * pool->user. * Ordering: inside rtnl_lock */ -static DEFINE_MUTEX(page_pools_lock); +DEFINE_MUTEX(page_pools_lock); /* Page pools are only reachable from user space (via netlink) if they are * linked to a netdev at creation time. Following page pool "visibility" @@ -216,6 +218,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, { struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; + unsigned int napi_id; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -229,8 +232,10 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, pool->slow.netdev->ifindex)) goto err_cancel; - if (pool->user.napi_id && - nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id)) + + napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0; + if (napi_id >= MIN_NAPI_ID && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id)) goto err_cancel; inflight = page_pool_inflight(pool, false); @@ -319,8 +324,6 @@ int page_pool_list(struct page_pool *pool) if (pool->slow.netdev) { hlist_add_head(&pool->user.list, &pool->slow.netdev->page_pools); - pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0; - netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF); } diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7bb94875a7ec..34bee42e1247 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -998,7 +998,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (ops->get_rxfh) { + if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; rc = ops->get_rxfh(dev, &rxfh); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 87bb3a91598e..a4bacf198555 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -700,9 +700,12 @@ static int fill_frame_info(struct hsr_frame_info *frame, frame->is_vlan = true; if (frame->is_vlan) { - if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr)) + /* Note: skb->mac_len might be wrong here. */ + if (!pskb_may_pull(skb, + skb_mac_offset(skb) + + offsetofend(struct hsr_vlan_ethhdr, vlanhdr))) return -EINVAL; - vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr; + vlan_hdr = (struct hsr_vlan_ethhdr *)skb_mac_header(skb); proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b0fbf804bbba..0e4076866c0a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -279,7 +279,7 @@ static void esp_output_done(void *data, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 03b6eee407a2..28d77d454d44 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -330,9 +330,6 @@ next_entry: list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) goto next_entry2; - if (filter->dev && - !mr_mfc_uses_dev(mrt, mfc, filter->dev)) - goto next_entry2; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0e5b9a654254..bc95d2a5924f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -265,11 +265,14 @@ static u16 tcp_select_window(struct sock *sk) u32 cur_win, new_win; /* Make the window 0 if we failed to queue the data because we - * are out of memory. The window is temporary, so we don't store - * it on the socket. + * are out of memory. */ - if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) + if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) { + tp->pred_flags = 0; + tp->rcv_wnd = 0; + tp->rcv_wup = tp->rcv_nxt; return 0; + } cur_win = tcp_receive_window(tp); new_win = __tcp_select_window(sk); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5f3d0cc1555a..9e73944e3b53 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -315,7 +315,7 @@ static void esp_output_done(void *data, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5f7b1fdbffe6..b3d5d1f266ee 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb->sk)) { + if (toobig && xfrm6_local_dontfrag(sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; } else if (toobig && xfrm6_noneed_fragment(skb)) { skb->ignore_df = 1; goto skip_frag; - } else if (!skb->ignore_df && toobig && skb->sk) { + } else if (!skb->ignore_df && toobig && sk) { xfrm_local_error(skb, mtu); kfree_skb(skb); return -EMSGSIZE; diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 3999e0ba2c35..2dd81e6c26bd 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -418,9 +418,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); subflow->mpc_drop = 1; mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); - } else { - subflow->mpc_drop = 0; } + } else if (ssk->sk_state == TCP_SYN_SENT) { + subflow->mpc_drop = 0; } } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 123f3f297284..fd2de185bc93 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; - mp_opt->use_ack = 0; mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } @@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, pr_debug("DSS\n"); ptr++; - /* we must clear 'mpc_map' be able to detect MP_CAPABLE - * map vs DSS map in mptcp_incoming_options(), and reconstruct - * map info accordingly - */ - mp_opt->mpc_map = 0; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; @@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb, const unsigned char *ptr; int length; - /* initialize option status */ - mp_opt->suboptions = 0; + /* Ensure that casting the whole status to u32 is efficient and safe */ + BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), + sizeof(u32))); + *(u32 *)&mp_opt->status = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 98ac73938bd8..572d160edca3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2020,7 +2020,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c44c89ecaca6..6bd819047470 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1767,8 +1767,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, * see mptcp_disconnect(). * Attempt it again outside the problematic scope. */ - if (!mptcp_disconnect(sk, 0)) + if (!mptcp_disconnect(sk, 0)) { + sk->sk_disconnects++; sk->sk_socket->state = SS_UNCONNECTED; + } } inet_clear_bit(DEFER_CONNECT, sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0174a5aad279..f6a207958459 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -149,22 +149,24 @@ struct mptcp_options_received { u32 subflow_seq; u16 data_len; __sum16 csum; - u16 suboptions; + struct_group(status, + u16 suboptions; + u16 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + reset_reason:4, + reset_transient:1, + echo:1, + backup:1, + deny_join_id0:1, + __unused:2; + ); + u8 join_id; u32 token; u32 nonce; - u16 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - reset_reason:4, - reset_transient:1, - echo:1, - backup:1, - deny_join_id0:1, - __unused:2; - u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index bf276eaf9330..7891a537bddd 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1385,6 +1385,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_package; break; case ncsi_dev_state_probe_package: + if (ndp->package_probe_id >= 8) { + /* Last package probed, finishing */ + ndp->flags |= NCSI_DEV_PROBED; + break; + } + ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; @@ -1501,13 +1507,8 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) if (ret) goto error; - /* Probe next package */ + /* Probe next package after receiving response */ ndp->package_probe_id++; - if (ndp->package_probe_id >= 8) { - /* Probe finished */ - ndp->flags |= NCSI_DEV_PROBED; - break; - } nd->state = ncsi_dev_state_probe_package; ndp->active_package = NULL; break; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 14bd66909ca4..4a8ce2949fae 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1089,14 +1089,12 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; + struct sockaddr *saddr = &ndp->pending_mac; struct net_device *ndev = ndp->ndev.dev; struct ncsi_rsp_gmcma_pkt *rsp; - struct sockaddr saddr; - int ret = -1; int i; rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp); - saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n", @@ -1108,20 +1106,20 @@ static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) rsp->addresses[i][4], rsp->addresses[i][5]); } + saddr->sa_family = ndev->type; for (i = 0; i < rsp->address_count; i++) { - memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN); - ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) { + if (!is_valid_ether_addr(rsp->addresses[i])) { netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n", - saddr.sa_data); + rsp->addresses[i]); continue; } - netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data); + memcpy(saddr->sa_data, rsp->addresses[i], ETH_ALEN); + netdev_warn(ndev, "NCSI: Will set MAC address to %pM\n", saddr->sa_data); break; } - ndp->gma_flag = ret == 0; - return ret; + ndp->gma_flag = 1; + return 0; } static struct ncsi_rsp_handler { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 667459256e4c..a34de9c17cf1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5078,7 +5078,7 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { - u32 num_regs = 0, key_num_regs = 0; + u32 len = 0, num_regs; struct nlattr *attr; int rem, err, i; @@ -5092,12 +5092,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, } for (i = 0; i < desc->field_count; i++) - num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + len += round_up(desc->field_len[i], sizeof(u32)); - key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); - if (key_num_regs != num_regs) + if (len != desc->klen) return -EINVAL; + num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32)); if (num_regs > NFT_REG32_COUNT) return -E2BIG; diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index de175318a3a0..082ab66f120b 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -542,6 +542,8 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, pr_debug("pipe created=%d\n", pipe); + if (pipe >= NCI_HCI_MAX_PIPES) + pipe = NCI_HCI_INVALID_PIPE; return pipe; } diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index f06ddbed3fed..1525773e94aa 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -122,6 +122,10 @@ static void rose_heartbeat_expiry(struct timer_list *t) struct rose_sock *rose = rose_sk(sk); bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20); + goto out; + } switch (rose->state) { case ROSE_STATE_0: /* Magic here: If we listen() and a new link dies before it @@ -152,6 +156,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) } rose_start_heartbeat(sk); +out: bh_unlock_sock(sk); sock_put(sk); } @@ -162,6 +167,10 @@ static void rose_timer_expiry(struct timer_list *t) struct sock *sk = &rose->sock; bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &rose->timer, jiffies + HZ/20); + goto out; + } switch (rose->state) { case ROSE_STATE_1: /* T1 */ case ROSE_STATE_4: /* T2 */ @@ -182,6 +191,7 @@ static void rose_timer_expiry(struct timer_list *t) } break; } +out: bh_unlock_sock(sk); sock_put(sk); } @@ -192,6 +202,10 @@ static void rose_idletimer_expiry(struct timer_list *t) struct sock *sk = &rose->sock; bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20); + goto out; + } rose_clear_queues(sk); rose_write_internal(sk, ROSE_CLEAR_REQUEST); @@ -207,6 +221,7 @@ static void rose_idletimer_expiry(struct timer_list *t) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } +out: bh_unlock_sock(sk); sock_put(sk); } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index d82e44a3901b..e874c31fa901 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -246,7 +246,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, bool use; int slot; - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); while (!list_empty(collector)) { peer = list_entry(collector->next, @@ -257,7 +257,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, continue; use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (use) { keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; @@ -277,17 +277,17 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, */ slot += cursor; slot &= mask; - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive); } rxrpc_put_peer(peer, rxrpc_peer_put_keepalive); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); } - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); } /* @@ -317,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) * second; the bucket at cursor + 1 goes at now + 1s and so * on... */ - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); list_splice_init(&rxnet->peer_keepalive_new, &collector); stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); @@ -329,7 +329,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) } base = now; - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index e1c63129586b..0fcc87f0409f 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -325,10 +325,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) hash_key = rxrpc_peer_hash_key(local, &peer->srx); rxrpc_init_peer(local, peer, hash_key); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); } /* @@ -360,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -373,7 +373,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, &rxnet->peer_keepalive_new); } - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) rxrpc_free_peer(candidate); @@ -423,10 +423,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxnet->peer_hash_lock); + spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - spin_unlock(&rxnet->peer_hash_lock); + spin_unlock_bh(&rxnet->peer_hash_lock); rxrpc_free_peer(peer); } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index f80bc05d4c5a..516038a44163 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) { struct ets_sched *q = qdisc_priv(sch); + if (arg == 0 || arg > q->nbands) + return NULL; return &q->classes[arg - 1]; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index fa9d1b49599b..075695173648 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { - vsock_remove_bound(vsk); + /* Transport reassignment must not remove the binding. */ + if (sock_flag(sk_vsock(vsk), SOCK_DEAD)) + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -821,12 +824,13 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); + sock_orphan(sk); + if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); - sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); @@ -1519,6 +1523,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, if (err < 0) goto out; + /* sk_err might have been set as a result of an earlier + * (failed) connect attempt. + */ + sk->sk_err = 0; + /* Mark sock as connecting and set the error code to in * progress in case this is a non-blocking connect. */ diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 98f1e2b67c76..c397eb99d867 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -506,7 +506,7 @@ xmit: skb_dst_set(skb, dst); skb->dev = tdev; - err = dst_output(xi->net, skb->sk, skb); + err = dst_output(xi->net, skb_to_full_sk(skb), skb); if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index b5025cf6136e..f7abd42c077d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -802,7 +802,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { skb->protocol = htons(ETH_P_IP); - if (skb->sk) + if (skb->sk && sk_fullsock(skb->sk)) xfrm_local_error(skb, mtu); else icmp_send(skb, ICMP_DEST_UNREACH, @@ -838,6 +838,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); + struct sock *sk = skb_to_full_sk(skb); if (skb->ignore_df) goto out; @@ -852,9 +853,9 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); - if (xfrm6_local_dontfrag(skb->sk)) + if (xfrm6_local_dontfrag(sk)) ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) + else if (sk) xfrm_local_error(skb, mtu); else icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e510021ee91..6551e588fe52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2964,7 +2964,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) skb_dst_drop(skb); skb_dst_set(skb, dst); - dst_output(net, skb->sk, skb); + dst_output(net, skb_to_full_sk(skb), skb); } out: diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index e500aebbad22..dbdf8a39dffe 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,10 +714,12 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(xo->seq.low < replay_esn->oseq)) { - XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; - xo->seq.hi = oseq_hi; - replay_esn->oseq_hi = oseq_hi; + if (unlikely(oseq < replay_esn->oseq)) { + replay_esn->oseq_hi = ++oseq_hi; + if (xo->seq.low < replay_esn->oseq) { + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.hi = oseq_hi; + } if (replay_esn->oseq_hi == 0) { replay_esn->oseq--; replay_esn->oseq_hi--; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34067cb8a479..ad2202fa82f3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -34,6 +34,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +#define xfrm_state_deref_check(table, net) \ + rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) static void xfrm_state_gc_task(struct work_struct *work); @@ -62,6 +64,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net, u32 reqid, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); } @@ -70,6 +74,8 @@ static inline unsigned int xfrm_src_hash(struct net *net, const xfrm_address_t *saddr, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } @@ -77,11 +83,15 @@ static inline unsigned int xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } static unsigned int xfrm_seq_hash(struct net *net, u32 seq) { + lockdep_assert_held(&net->xfrm.xfrm_state_lock); + return __xfrm_seq_hash(seq, net->xfrm.state_hmask); } @@ -1108,16 +1118,38 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, x->props.family = tmpl->encap_family; } -static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, +struct xfrm_hash_state_ptrs { + const struct hlist_head *bydst; + const struct hlist_head *bysrc; + const struct hlist_head *byspi; + unsigned int hmask; +}; + +static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs) +{ + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); + + ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net); + ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net); + ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net); + ptrs->hmask = net->xfrm.state_hmask; + } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)); +} + +static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, struct xfrm_dev_offload *xdo) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { #ifdef CONFIG_XFRM_OFFLOAD if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1151,15 +1183,16 @@ static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, return NULL; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -1181,11 +1214,11 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct hlist_head *state_cache_input; struct xfrm_state *x = NULL; - int cpu = get_cpu(); - state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu); + state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input); rcu_read_lock(); hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { @@ -1202,7 +1235,9 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, goto out; } - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); if (x && x->km.state == XFRM_STATE_VALID) { spin_lock_bh(&net->xfrm.xfrm_state_lock); @@ -1217,20 +1252,20 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark, out: rcu_read_unlock(); - put_cpu(); return x; } EXPORT_SYMBOL(xfrm_input_state_lookup); -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, +static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs, + u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(net, daddr, saddr, family); + unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask); struct xfrm_state *x; - hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { + hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -1250,14 +1285,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xs_net(x); u32 mark = x->mark.v & x->mark.m; + xfrm_hash_ptrs_get(net, &state_ptrs); + if (use_spi) - return __xfrm_state_lookup(net, mark, &x->id.daddr, + return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(net, mark, + return __xfrm_state_lookup_byaddr(&state_ptrs, mark, &x->id.daddr, &x->props.saddr, x->id.proto, family); @@ -1331,6 +1369,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, u32 if_id) { static xfrm_address_t saddr_wildcard = { }; + struct xfrm_hash_state_ptrs state_ptrs; struct net *net = xp_net(pol); unsigned int h, h_wildcard; struct xfrm_state *x, *x0, *to_put; @@ -1395,8 +1434,10 @@ cached: else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ WARN_ON(1); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { + xfrm_hash_ptrs_get(net, &state_ptrs); + + h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { #ifdef CONFIG_XFRM_OFFLOAD if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1429,8 +1470,9 @@ cached: if (best || acquire_in_progress) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { + h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid, + encap_family, state_ptrs.hmask); + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) { #ifdef CONFIG_XFRM_OFFLOAD if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) @@ -1468,7 +1510,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup_all(net, mark, daddr, + (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr, tmpl->id.spi, tmpl->id.proto, encap_family, &pol->xdo)) != NULL) { @@ -2253,10 +2295,13 @@ struct xfrm_state * xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; rcu_read_lock(); - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); rcu_read_unlock(); return x; } @@ -2267,10 +2312,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { + struct xfrm_hash_state_ptrs state_ptrs; struct xfrm_state *x; spin_lock_bh(&net->xfrm.xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); + + xfrm_hash_ptrs_get(net, &state_ptrs); + + x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); spin_unlock_bh(&net->xfrm.xfrm_state_lock); return x; } diff --git a/rust/Makefile b/rust/Makefile index 71a05a3c895a..8fcfd60447bc 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -331,10 +331,11 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }' + quiet_cmd_exports = EXPORTS $@ cmd_exports = \ - $(NM) -p --defined-only $< \ - | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ + $(call rust_exports,$<,"EXPORT_SYMBOL_RUST_GPL(%s);\n") > $@ $(obj)/exports_core_generated.h: $(obj)/core.o FORCE $(call if_changed,exports) @@ -403,11 +404,36 @@ ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) __ashlti3 __lshrti3 endif +ifdef CONFIG_MODVERSIONS +cmd_gendwarfksyms = $(if $(skip_gendwarfksyms),, \ + $(call rust_exports,$@,"%s\n") | \ + scripts/gendwarfksyms/gendwarfksyms \ + $(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) \ + $(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes),) \ + $@ >> $(dot-target).cmd) +endif + define rule_rustc_library $(call cmd_and_fixdep,rustc_library) $(call cmd,gen_objtooldep) + $(call cmd,gendwarfksyms) endef +define rule_rust_cc_library + $(call if_changed_rule,cc_o_c) + $(call cmd,force_checksrc) + $(call cmd,gendwarfksyms) +endef + +# helpers.o uses the same export mechanism as Rust libraries, so ensure symbol +# versions are calculated for the helpers too. +$(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE + +$(call if_changed_rule,rust_cc_library) + +# Disable symbol versioning for exports.o to avoid conflicts with the actual +# symbol versions generated from Rust objects. +$(obj)/exports.o: private skip_gendwarfksyms = 1 + $(obj)/core.o: private skip_clippy = 1 $(obj)/core.o: private skip_flags = -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) @@ -419,13 +445,16 @@ ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) $(obj)/core.o: scripts/target.json endif +$(obj)/compiler_builtins.o: private skip_gendwarfksyms = 1 $(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*' $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE +$(call if_changed_rule,rustc_library) +$(obj)/build_error.o: private skip_gendwarfksyms = 1 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) +$(obj)/ffi.o: private skip_gendwarfksyms = 1 $(obj)/ffi.o: $(src)/ffi.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) @@ -437,6 +466,7 @@ $(obj)/bindings.o: $(src)/bindings/lib.rs \ +$(call if_changed_rule,rustc_library) $(obj)/uapi.o: private rustc_target_flags = --extern ffi +$(obj)/uapi.o: private skip_gendwarfksyms = 1 $(obj)/uapi.o: $(src)/uapi/lib.rs \ $(obj)/ffi.o \ $(obj)/uapi/uapi_generated.rs FORCE diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c index 94b87569d2a2..7f6ef06a2f06 100644 --- a/samples/check-exec/inc.c +++ b/samples/check-exec/inc.c @@ -21,8 +21,15 @@ #include <stdlib.h> #include <string.h> #include <sys/prctl.h> +#include <sys/syscall.h> #include <unistd.h> +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + /* Returns 1 on error, 0 otherwise. */ static int interpret_buffer(char *buffer, size_t buffer_size) { @@ -78,8 +85,8 @@ static int interpret_stream(FILE *script, char *const script_name, * script execution. We must use the script file descriptor instead of * the script path name to avoid race conditions. */ - err = execveat(fileno(script), "", script_argv, envp, - AT_EMPTY_PATH | AT_EXECVE_CHECK); + err = sys_execveat(fileno(script), "", script_argv, envp, + AT_EMPTY_PATH | AT_EXECVE_CHECK); if (err && restrict_stream) { perror("ERROR: Script execution check"); return 1; diff --git a/scripts/Makefile b/scripts/Makefile index 546e8175e1c4..46f860529df5 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -53,7 +53,8 @@ hostprogs += unifdef gen_packed_field_checks targets += module.lds subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins -subdir-$(CONFIG_MODVERSIONS) += genksyms +subdir-$(CONFIG_GENKSYMS) += genksyms +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms subdir-$(CONFIG_SECURITY_SELINUX) += selinux subdir-$(CONFIG_SECURITY_IPE) += ipe diff --git a/scripts/Makefile.build b/scripts/Makefile.build index c16e4cf54d77..993708d11874 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -107,13 +107,24 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(obj)/%.c FORCE $(call if_changed_dep,cpp_i_c) +getexportsymbols = $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/$(1)/p' + +gendwarfksyms = $(objtree)/scripts/gendwarfksyms/gendwarfksyms \ + $(if $(KBUILD_SYMTYPES), --symtypes $(@:.o=.symtypes)) \ + $(if $(KBUILD_GENDWARFKSYMS_STABLE), --stable) + genksyms = $(objtree)/scripts/genksyms/genksyms \ $(if $(KBUILD_SYMTYPES), -T $(@:.o=.symtypes)) \ $(if $(KBUILD_PRESERVE), -p) \ $(addprefix -r , $(wildcard $(@:.o=.symref))) # These mirror gensymtypes_S and co below, keep them in synch. +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_c = $(if $(skip_gendwarfksyms),, \ + $(call getexportsymbols,\1) | $(gendwarfksyms) $@) +else cmd_gensymtypes_c = $(CPP) -D__GENKSYMS__ $(c_flags) $< | $(genksyms) +endif # CONFIG_GENDWARFKSYMS # LLVM assembly # Generate .ll files from .c @@ -183,7 +194,9 @@ endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT is-standard-object = $(if $(filter-out y%, $(OBJECT_FILES_NON_STANDARD_$(target-stem).o)$(OBJECT_FILES_NON_STANDARD)n),$(is-kernel-object)) +ifdef CONFIG_OBJTOOL $(obj)/%.o: private objtool-enabled = $(if $(is-standard-object),$(if $(delay-objtool),$(is-single-obj-m),y)) +endif ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi))) @@ -286,14 +299,26 @@ $(obj)/%.rs: $(obj)/%.rs.S FORCE # This is convoluted. The .S file must first be preprocessed to run guards and # expand names, then the resulting exports must be constructed into plain # EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed -# to make the genksyms input. +# to make the genksyms input or compiled into an object for gendwarfksyms. # # These mirror gensymtypes_c and co above, keep them in synch. -cmd_gensymtypes_S = \ - { echo "\#include <linux/kernel.h>" ; \ - echo "\#include <asm/asm-prototypes.h>" ; \ - $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } | \ - $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +getasmexports = \ + { echo "\#include <linux/kernel.h>" ; \ + echo "\#include <linux/string.h>" ; \ + echo "\#include <asm/asm-prototypes.h>" ; \ + $(call getexportsymbols,EXPORT_SYMBOL(\1);) ; } + +ifdef CONFIG_GENDWARFKSYMS +cmd_gensymtypes_S = \ + $(getasmexports) | \ + $(CC) $(c_flags) -c -o $(@:.o=.gendwarfksyms.o) -xc -; \ + $(call getexportsymbols,\1) | \ + $(gendwarfksyms) $(@:.o=.gendwarfksyms.o) +else +cmd_gensymtypes_S = \ + $(getasmexports) | \ + $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms) +endif # CONFIG_GENDWARFKSYMS quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@ cmd_cpp_s_S = $(CPP) $(a_flags) -o $@ $< diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index 226ea3df3b4b..a44307f08e9d 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 # Configuration heplers +cmd_merge_fragments = \ + $(srctree)/scripts/kconfig/merge_config.sh \ + $4 -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$2 \ + $(foreach config,$3,$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + # Creates 'merged defconfigs' # --------------------------------------------------------------------------- # Usage: @@ -8,9 +13,7 @@ # # Input config fragments without '.config' suffix define merge_into_defconfig - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + $(call cmd,merge_fragments,$1,$2) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -22,8 +25,6 @@ endef # # Input config fragments without '.config' suffix define merge_into_defconfig_override - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) + $(call cmd,merge_fragments,$1,$2,-Q) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 1d13cecc7cc7..eb719f6d8d53 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -77,6 +77,9 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) +# Explicitly clear padding bits during variable initialization +KBUILD_CFLAGS += $(call cc-option,-fzero-init-padding-bits=all) + KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 7395200538da..ad55ef201aac 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -287,6 +287,8 @@ delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) +objtool-enabled := y + endif # CONFIG_OBJTOOL # Useful for describing the dependency of composite objects @@ -302,11 +304,11 @@ endef # =========================================================================== # These are shared by some Makefile.* files. -objtool-enabled := y - ifdef CONFIG_LTO_CLANG -# objtool cannot process LLVM IR. Make $(LD) covert LLVM IR to ELF here. -cmd_ld_single = $(if $(objtool-enabled), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) +# Run $(LD) here to covert LLVM IR to ELF in the following cases: +# - when this object needs objtool processing, as objtool cannot process LLVM IR +# - when this is a single-object module, as modpost cannot process LLVM IR +cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) endif quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ @@ -374,6 +376,9 @@ quiet_cmd_ar = AR $@ quiet_cmd_objcopy = OBJCOPY $@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ +quiet_cmd_strip_relocs = RSTRIP $@ +cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $@ + # Gzip # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index f97c9926ed31..1628198f3e83 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -105,7 +105,7 @@ else sig-key := $(CONFIG_MODULE_SIG_KEY) endif quiet_cmd_sign = SIGN $@ - cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \ + cmd_sign = $(objtree)/scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" $(objtree)/certs/signing_key.x509 $@ \ $(if $(KBUILD_EXTMOD),|| true) ifeq ($(sign-only),) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index ab0e94ea6249..d7d45067d08b 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -43,6 +43,8 @@ MODPOST = $(objtree)/scripts/mod/modpost modpost-args = \ $(if $(CONFIG_MODULES),-M) \ $(if $(CONFIG_MODVERSIONS),-m) \ + $(if $(CONFIG_BASIC_MODVERSIONS),-b) \ + $(if $(CONFIG_EXTENDED_MODVERSIONS),-x) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_MODPOST_WARN),-w) \ diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 2f11c4f9c345..13eb8b3901b8 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -167,7 +167,7 @@ def get_current_task(cpu): var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): - current_task_addr = gdb.parse_and_eval("$SP_EL0") + current_task_addr = gdb.parse_and_eval("(unsigned long)$SP_EL0") if (current_task_addr >> 63) != 0: current_task = current_task_addr.cast(task_ptr_type) return current_task.dereference() diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore new file mode 100644 index 000000000000..0927f8d3cd96 --- /dev/null +++ b/scripts/gendwarfksyms/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +/gendwarfksyms diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile new file mode 100644 index 000000000000..6334c7d3c4d5 --- /dev/null +++ b/scripts/gendwarfksyms/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs-always-y += gendwarfksyms + +gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += cache.o +gendwarfksyms-objs += die.o +gendwarfksyms-objs += dwarf.o +gendwarfksyms-objs += kabi.o +gendwarfksyms-objs += symbols.o +gendwarfksyms-objs += types.o + +HOSTLDLIBS_gendwarfksyms := -ldw -lelf -lz diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c new file mode 100644 index 000000000000..c9c19b86a686 --- /dev/null +++ b/scripts/gendwarfksyms/cache.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +struct cache_item { + unsigned long key; + int value; + struct hlist_node hash; +}; + +void cache_set(struct cache *cache, unsigned long key, int value) +{ + struct cache_item *ci; + + ci = xmalloc(sizeof(struct cache_item)); + ci->key = key; + ci->value = value; + hash_add(cache->cache, &ci->hash, hash_32(key)); +} + +int cache_get(struct cache *cache, unsigned long key) +{ + struct cache_item *ci; + + hash_for_each_possible(cache->cache, ci, hash, hash_32(key)) { + if (ci->key == key) + return ci->value; + } + + return -1; +} + +void cache_init(struct cache *cache) +{ + hash_init(cache->cache); +} + +void cache_free(struct cache *cache) +{ + struct hlist_node *tmp; + struct cache_item *ci; + + hash_for_each_safe(cache->cache, ci, tmp, hash) { + free(ci); + } + + hash_init(cache->cache); +} diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c new file mode 100644 index 000000000000..66bd4c9bc952 --- /dev/null +++ b/scripts/gendwarfksyms/die.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include <string.h> +#include "gendwarfksyms.h" + +#define DIE_HASH_BITS 15 + +/* {die->addr, state} -> struct die * */ +static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS); + +static unsigned int map_hits; +static unsigned int map_misses; + +static inline unsigned int die_hash(uintptr_t addr, enum die_state state) +{ + return hash_32(addr_hash(addr) ^ (unsigned int)state); +} + +static void init_die(struct die *cd) +{ + cd->state = DIE_INCOMPLETE; + cd->mapped = false; + cd->fqn = NULL; + cd->tag = -1; + cd->addr = 0; + INIT_LIST_HEAD(&cd->fragments); +} + +static struct die *create_die(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + cd = xmalloc(sizeof(struct die)); + init_die(cd); + cd->addr = (uintptr_t)die->addr; + + hash_add(die_map, &cd->hash, die_hash(cd->addr, state)); + return cd; +} + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) +{ + struct die *cd; + + hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) { + if (cd->addr == addr && cd->state == state) { + *res = cd; + return 0; + } + } + + return -1; +} + +struct die *die_map_get(Dwarf_Die *die, enum die_state state) +{ + struct die *cd; + + if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) { + map_hits++; + return cd; + } + + map_misses++; + return create_die(die, state); +} + +static void reset_die(struct die *cd) +{ + struct die_fragment *tmp; + struct die_fragment *df; + + list_for_each_entry_safe(df, tmp, &cd->fragments, list) { + if (df->type == FRAGMENT_STRING) + free(df->data.str); + free(df); + } + + if (cd->fqn && *cd->fqn) + free(cd->fqn); + init_die(cd); +} + +void die_map_for_each(die_map_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct die *cd; + + hash_for_each_safe(die_map, cd, tmp, hash) { + func(cd, arg); + } +} + +void die_map_free(void) +{ + struct hlist_node *tmp; + unsigned int stats[DIE_LAST + 1]; + struct die *cd; + int i; + + memset(stats, 0, sizeof(stats)); + + hash_for_each_safe(die_map, cd, tmp, hash) { + stats[cd->state]++; + reset_die(cd); + free(cd); + } + hash_init(die_map); + + if (map_hits + map_misses > 0) + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, + map_misses, + (100.0f * map_hits) / (map_hits + map_misses)); + + for (i = 0; i <= DIE_LAST; i++) + debug("%s: %u entries", die_state_name(i), stats[i]); +} + +static struct die_fragment *append_item(struct die *cd) +{ + struct die_fragment *df; + + df = xmalloc(sizeof(struct die_fragment)); + df->type = FRAGMENT_EMPTY; + list_add_tail(&df->list, &cd->fragments); + return df; +} + +void die_map_add_string(struct die *cd, const char *str) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.str = xstrdup(str); + df->type = FRAGMENT_STRING; +} + +void die_map_add_linebreak(struct die *cd, int linebreak) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.linebreak = linebreak; + df->type = FRAGMENT_LINEBREAK; +} + +void die_map_add_die(struct die *cd, struct die *child) +{ + struct die_fragment *df; + + if (!cd) + return; + + df = append_item(cd); + df->data.addr = child->addr; + df->type = FRAGMENT_DIE; +} diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c new file mode 100644 index 000000000000..534d9aa7c114 --- /dev/null +++ b/scripts/gendwarfksyms/dwarf.c @@ -0,0 +1,1159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include <assert.h> +#include <inttypes.h> +#include <stdarg.h> +#include "gendwarfksyms.h" + +/* See get_union_kabi_status */ +#define KABI_PREFIX "__kabi_" +#define KABI_PREFIX_LEN (sizeof(KABI_PREFIX) - 1) +#define KABI_RESERVED_PREFIX "reserved" +#define KABI_RESERVED_PREFIX_LEN (sizeof(KABI_RESERVED_PREFIX) - 1) +#define KABI_RENAMED_PREFIX "renamed" +#define KABI_RENAMED_PREFIX_LEN (sizeof(KABI_RENAMED_PREFIX) - 1) +#define KABI_IGNORED_PREFIX "ignored" +#define KABI_IGNORED_PREFIX_LEN (sizeof(KABI_IGNORED_PREFIX) - 1) + +static inline bool is_kabi_prefix(const char *name) +{ + return name && !strncmp(name, KABI_PREFIX, KABI_PREFIX_LEN); +} + +enum kabi_status { + /* >0 to stop DIE processing */ + KABI_NORMAL = 1, + KABI_RESERVED, + KABI_IGNORED, +}; + +static bool do_linebreak; +static int indentation_level; + +/* Line breaks and indentation for pretty-printing */ +static void process_linebreak(struct die *cache, int n) +{ + indentation_level += n; + do_linebreak = true; + die_map_add_linebreak(cache, n); +} + +#define DEFINE_GET_ATTR(attr, type) \ + static bool get_##attr##_attr(Dwarf_Die *die, unsigned int id, \ + type *value) \ + { \ + Dwarf_Attribute da; \ + return dwarf_attr(die, id, &da) && \ + !dwarf_form##attr(&da, value); \ + } + +DEFINE_GET_ATTR(flag, bool) +DEFINE_GET_ATTR(udata, Dwarf_Word) + +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value) +{ + Dwarf_Attribute da; + + /* dwarf_formref_die returns a pointer instead of an error value. */ + return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value); +} + +#define DEFINE_GET_STRING_ATTR(attr) \ + static const char *get_##attr##_attr(Dwarf_Die *die) \ + { \ + Dwarf_Attribute da; \ + if (dwarf_attr(die, DW_AT_##attr, &da)) \ + return dwarf_formstring(&da); \ + return NULL; \ + } + +DEFINE_GET_STRING_ATTR(name) +DEFINE_GET_STRING_ATTR(linkage_name) + +static const char *get_symbol_name(Dwarf_Die *die) +{ + const char *name; + + /* rustc uses DW_AT_linkage_name for exported symbols */ + name = get_linkage_name_attr(die); + if (!name) + name = get_name_attr(die); + + return name; +} + +static bool match_export_symbol(struct state *state, Dwarf_Die *die) +{ + Dwarf_Die *source = die; + Dwarf_Die origin; + + /* If the DIE has an abstract origin, use it for type information. */ + if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) + source = &origin; + + state->sym = symbol_get(get_symbol_name(die)); + + /* Look up using the origin name if there are no matches. */ + if (!state->sym && source != die) + state->sym = symbol_get(get_symbol_name(source)); + + state->die = *source; + return !!state->sym; +} + +/* DW_AT_decl_file -> struct srcfile */ +static struct cache srcfile_cache; + +static bool is_definition_private(Dwarf_Die *die) +{ + Dwarf_Word filenum; + Dwarf_Files *files; + Dwarf_Die cudie; + const char *s; + int res; + + /* + * Definitions in .c files cannot change the public ABI, + * so consider them private. + */ + if (!get_udata_attr(die, DW_AT_decl_file, &filenum)) + return false; + + res = cache_get(&srcfile_cache, filenum); + if (res >= 0) + return !!res; + + if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) + error("dwarf_cu_die failed: '%s'", dwarf_errmsg(-1)); + + if (dwarf_getsrcfiles(&cudie, &files, NULL)) + error("dwarf_getsrcfiles failed: '%s'", dwarf_errmsg(-1)); + + s = dwarf_filesrc(files, filenum, NULL, NULL); + if (!s) + error("dwarf_filesrc failed: '%s'", dwarf_errmsg(-1)); + + s = strrchr(s, '.'); + res = s && !strcmp(s, ".c"); + cache_set(&srcfile_cache, filenum, res); + + return !!res; +} + +static bool is_kabi_definition(struct die *cache, Dwarf_Die *die) +{ + bool value; + + if (get_flag_attr(die, DW_AT_declaration, &value) && value) + return false; + + if (kabi_is_declonly(cache->fqn)) + return false; + + return !is_definition_private(die); +} + +/* + * Type string processing + */ +static void process(struct die *cache, const char *s) +{ + s = s ?: "<null>"; + + if (dump_dies && do_linebreak) { + fputs("\n", stderr); + for (int i = 0; i < indentation_level; i++) + fputs(" ", stderr); + do_linebreak = false; + } + if (dump_dies) + fputs(s, stderr); + + if (cache) + die_debug_r("cache %p string '%s'", cache, s); + die_map_add_string(cache, s); +} + +#define MAX_FMT_BUFFER_SIZE 128 + +static void process_fmt(struct die *cache, const char *fmt, ...) +{ + char buf[MAX_FMT_BUFFER_SIZE]; + va_list args; + + va_start(args, fmt); + + if (checkp(vsnprintf(buf, sizeof(buf), fmt, args)) >= sizeof(buf)) + error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE"); + + process(cache, buf); + va_end(args); +} + +#define MAX_FQN_SIZE 64 + +/* Get a fully qualified name from DWARF scopes */ +static char *get_fqn(Dwarf_Die *die) +{ + const char *list[MAX_FQN_SIZE]; + Dwarf_Die *scopes = NULL; + bool has_name = false; + char *fqn = NULL; + char *p; + int count = 0; + int len = 0; + int res; + int i; + + res = checkp(dwarf_getscopes_die(die, &scopes)); + if (!res) { + list[count] = get_name_attr(die); + + if (!list[count]) + return NULL; + + len += strlen(list[count]); + count++; + + goto done; + } + + for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { + if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) + continue; + + list[count] = get_name_attr(&scopes[i]); + + if (list[count]) { + has_name = true; + } else { + list[count] = "<anonymous>"; + has_name = false; + } + + len += strlen(list[count]); + count++; + + if (i > 0) { + list[count++] = "::"; + len += 2; + } + } + + free(scopes); + + if (count == MAX_FQN_SIZE) + warn("increase MAX_FQN_SIZE: reached the maximum"); + + /* Consider the DIE unnamed if the last scope doesn't have a name */ + if (!has_name) + return NULL; +done: + fqn = xmalloc(len + 1); + *fqn = '\0'; + + p = fqn; + for (i = 0; i < count; i++) + p = stpcpy(p, list[i]); + + return fqn; +} + +static void update_fqn(struct die *cache, Dwarf_Die *die) +{ + if (!cache->fqn) + cache->fqn = get_fqn(die) ?: ""; +} + +static void process_fqn(struct die *cache, Dwarf_Die *die) +{ + update_fqn(cache, die); + if (*cache->fqn) + process(cache, " "); + process(cache, cache->fqn); +} + +#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute) \ + static void process_##attribute##_attr(struct die *cache, \ + Dwarf_Die *die) \ + { \ + Dwarf_Word value; \ + if (get_udata_attr(die, DW_AT_##attribute, &value)) \ + process_fmt(cache, " " #attribute "(%" PRIu64 ")", \ + value); \ + } + +DEFINE_PROCESS_UDATA_ATTRIBUTE(accessibility) +DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment) +DEFINE_PROCESS_UDATA_ATTRIBUTE(bit_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(byte_size) +DEFINE_PROCESS_UDATA_ATTRIBUTE(encoding) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_bit_offset) +DEFINE_PROCESS_UDATA_ATTRIBUTE(data_member_location) +DEFINE_PROCESS_UDATA_ATTRIBUTE(discr_value) + +/* Match functions -- die_match_callback_t */ +#define DEFINE_MATCH(type) \ + static bool match_##type##_type(Dwarf_Die *die) \ + { \ + return dwarf_tag(die) == DW_TAG_##type##_type; \ + } + +DEFINE_MATCH(enumerator) +DEFINE_MATCH(formal_parameter) +DEFINE_MATCH(member) +DEFINE_MATCH(subrange) + +bool match_all(Dwarf_Die *die) +{ + return true; +} + +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match) +{ + Dwarf_Die current; + int res; + + /* Track the first item in lists. */ + if (state) + state->first_list_item = true; + + res = checkp(dwarf_child(die, ¤t)); + while (!res) { + if (match(¤t)) { + /* <0 = error, 0 = continue, >0 = stop */ + res = checkp(func(state, cache, ¤t)); + if (res) + goto out; + } + + res = checkp(dwarf_siblingof(¤t, ¤t)); + } + + res = 0; +out: + if (state) + state->first_list_item = false; + + return res; +} + +static int process_type(struct state *state, struct die *parent, + Dwarf_Die *die); + +static void process_type_attr(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Die type; + + if (get_ref_die_attr(die, DW_AT_type, &type)) { + check(process_type(state, cache, &type)); + return; + } + + /* Compilers can omit DW_AT_type -- print out 'void' to clarify */ + process(cache, "base_type void"); +} + +static void process_list_comma(struct state *state, struct die *cache) +{ + if (state->first_list_item) { + state->first_list_item = false; + } else { + process(cache, " ,"); + process_linebreak(cache, 0); + } +} + +/* Comma-separated with DW_AT_type */ +static void __process_list_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + const char *name = get_name_attr(die); + + if (stable) { + if (is_kabi_prefix(name)) + name = NULL; + state->kabi.orig_name = NULL; + } + + process_list_comma(state, cache); + process(cache, type); + process_type_attr(state, cache, die); + + if (stable && state->kabi.orig_name) + name = state->kabi.orig_name; + if (name) { + process(cache, " "); + process(cache, name); + } + + process_accessibility_attr(cache, die); + process_bit_size_attr(cache, die); + process_data_bit_offset_attr(cache, die); + process_data_member_location_attr(cache, die); +} + +#define DEFINE_PROCESS_LIST_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + { \ + __process_list_type(state, cache, die, #type " "); \ + } + +DEFINE_PROCESS_LIST_TYPE(formal_parameter) +DEFINE_PROCESS_LIST_TYPE(member) + +/* Container types with DW_AT_type */ +static void __process_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); +} + +#define DEFINE_PROCESS_TYPE(type) \ + static void process_##type##_type(struct state *state, \ + struct die *cache, Dwarf_Die *die) \ + { \ + __process_type(state, cache, die, #type "_type"); \ + } + +DEFINE_PROCESS_TYPE(atomic) +DEFINE_PROCESS_TYPE(const) +DEFINE_PROCESS_TYPE(immutable) +DEFINE_PROCESS_TYPE(packed) +DEFINE_PROCESS_TYPE(pointer) +DEFINE_PROCESS_TYPE(reference) +DEFINE_PROCESS_TYPE(restrict) +DEFINE_PROCESS_TYPE(rvalue_reference) +DEFINE_PROCESS_TYPE(shared) +DEFINE_PROCESS_TYPE(template_type_parameter) +DEFINE_PROCESS_TYPE(volatile) +DEFINE_PROCESS_TYPE(typedef) + +static void process_subrange_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Word count = 0; + + if (get_udata_attr(die, DW_AT_count, &count)) + process_fmt(cache, "[%" PRIu64 "]", count); + else if (get_udata_attr(die, DW_AT_upper_bound, &count)) + process_fmt(cache, "[%" PRIu64 "]", count + 1); + else + process(cache, "[]"); +} + +static void process_array_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "array_type"); + /* Array size */ + check(process_die_container(state, cache, die, process_type, + match_subrange_type)); + process(cache, " {"); + process_linebreak(cache, 1); + process_type_attr(state, cache, die); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static void __process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type) +{ + process(cache, type); + process(cache, " ("); + process_linebreak(cache, 1); + /* Parameters */ + check(process_die_container(state, cache, die, process_type, + match_formal_parameter_type)); + process_linebreak(cache, -1); + process(cache, ")"); + process_linebreak(cache, 0); + /* Return type */ + process(cache, "-> "); + process_type_attr(state, cache, die); +} + +static void process_subroutine_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subroutine_type"); +} + +static void process_variant_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_member_type)); + process_linebreak(cache, -1); + process(cache, "}"); + process_discr_value_attr(cache, die); +} + +static void process_variant_part_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process_list_comma(state, cache); + process(cache, "variant_part {"); + process_linebreak(cache, 1); + check(process_die_container(state, cache, die, process_type, + match_all)); + process_linebreak(cache, -1); + process(cache, "}"); +} + +static int get_kabi_status(Dwarf_Die *die, const char **suffix) +{ + const char *name = get_name_attr(die); + + if (suffix) + *suffix = NULL; + + if (is_kabi_prefix(name)) { + name += KABI_PREFIX_LEN; + + if (!strncmp(name, KABI_RESERVED_PREFIX, + KABI_RESERVED_PREFIX_LEN)) + return KABI_RESERVED; + if (!strncmp(name, KABI_IGNORED_PREFIX, + KABI_IGNORED_PREFIX_LEN)) + return KABI_IGNORED; + + if (!strncmp(name, KABI_RENAMED_PREFIX, + KABI_RENAMED_PREFIX_LEN)) { + if (suffix) { + name += KABI_RENAMED_PREFIX_LEN; + *suffix = name; + } + return KABI_RESERVED; + } + } + + return KABI_NORMAL; +} + +static int check_struct_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + int res; + + assert(dwarf_tag(die) == DW_TAG_member_type); + + /* + * If the union member is a struct, expect the __kabi field to + * be the first member of the structure, i.e..: + * + * union { + * type new_member; + * struct { + * type __kabi_field; + * } + * }; + */ + res = get_kabi_status(die, &state->kabi.orig_name); + + if (res == KABI_RESERVED && + !get_ref_die_attr(die, DW_AT_type, &state->kabi.placeholder)) + error("structure member missing a type?"); + + return res; +} + +static int check_union_member_kabi_status(struct state *state, + struct die *__unused, Dwarf_Die *die) +{ + Dwarf_Die type; + int res; + + assert(dwarf_tag(die) == DW_TAG_member_type); + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("union member missing a type?"); + + /* + * We expect a union with two members. Check if either of them + * has a __kabi name prefix, i.e.: + * + * union { + * ... + * type memberN; // <- type, N = {0,1} + * ... + * }; + * + * The member can also be a structure type, in which case we'll + * check the first structure member. + * + * In any case, stop processing after we've seen two members. + */ + res = get_kabi_status(die, &state->kabi.orig_name); + + if (res == KABI_RESERVED) + state->kabi.placeholder = type; + if (res != KABI_NORMAL) + return res; + + if (dwarf_tag(&type) == DW_TAG_structure_type) + res = checkp(process_die_container( + state, NULL, &type, check_struct_member_kabi_status, + match_member_type)); + + if (res <= KABI_NORMAL && ++state->kabi.members < 2) + return 0; /* Continue */ + + return res; +} + +static int get_union_kabi_status(Dwarf_Die *die, Dwarf_Die *placeholder, + const char **orig_name) +{ + struct state state; + int res; + + if (!stable) + return KABI_NORMAL; + + /* + * To maintain a stable kABI, distributions may choose to reserve + * space in structs for later use by adding placeholder members, + * for example: + * + * struct s { + * u32 a; + * // an 8-byte placeholder for future use + * u64 __kabi_reserved_0; + * }; + * + * When the reserved member is taken into use, the type change + * would normally cause the symbol version to change as well, but + * if the replacement uses the following convention, gendwarfksyms + * continues to use the placeholder type for versioning instead, + * thus maintaining the same symbol version: + * + * struct s { + * u32 a; + * union { + * // placeholder replaced with a new member `b` + * struct t b; + * struct { + * // the placeholder type that is still + * // used for versioning + * u64 __kabi_reserved_0; + * }; + * }; + * }; + * + * I.e., as long as the replaced member is in a union, and the + * placeholder has a __kabi_reserved name prefix, we'll continue + * to use the placeholder type (here u64) for version calculation + * instead of the union type. + * + * It's also possible to ignore new members from versioning if + * they've been added to alignment holes, for example, by + * including them in a union with another member that uses the + * __kabi_ignored name prefix: + * + * struct s { + * u32 a; + * // an alignment hole is used to add `n` + * union { + * u32 n; + * // hide the entire union member from versioning + * u8 __kabi_ignored_0; + * }; + * u64 b; + * }; + * + * Note that the user of this feature is responsible for ensuring + * that the structure actually remains ABI compatible. + */ + memset(&state.kabi, 0, sizeof(struct kabi_state)); + + res = checkp(process_die_container(&state, NULL, die, + check_union_member_kabi_status, + match_member_type)); + + if (res == KABI_RESERVED) { + if (placeholder) + *placeholder = state.kabi.placeholder; + if (orig_name) + *orig_name = state.kabi.orig_name; + } + + return res; +} + +static bool is_kabi_ignored(Dwarf_Die *die) +{ + Dwarf_Die type; + + if (!stable) + return false; + + if (!get_ref_die_attr(die, DW_AT_type, &type)) + error("member missing a type?"); + + return dwarf_tag(&type) == DW_TAG_union_type && + checkp(get_union_kabi_status(&type, NULL, NULL)) == KABI_IGNORED; +} + +static int ___process_structure_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + switch (dwarf_tag(die)) { + case DW_TAG_member: + if (is_kabi_ignored(die)) + return 0; + return check(process_type(state, cache, die)); + case DW_TAG_variant_part: + return check(process_type(state, cache, die)); + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_template_type_parameter: + case DW_TAG_union_type: + case DW_TAG_subprogram: + /* Skip non-member types, including member functions */ + return 0; + default: + error("unexpected structure_type child: %x", dwarf_tag(die)); + } +} + +static void __process_structure_type(struct state *state, struct die *cache, + Dwarf_Die *die, const char *type, + die_callback_t process_func, + die_match_callback_t match_func) +{ + bool expand; + + process(cache, type); + process_fqn(cache, die); + process(cache, " {"); + process_linebreak(cache, 1); + + expand = state->expand.expand && is_kabi_definition(cache, die); + + if (expand) { + state->expand.current_fqn = cache->fqn; + check(process_die_container(state, cache, die, process_func, + match_func)); + } + + process_linebreak(cache, -1); + process(cache, "}"); + + if (expand) { + process_byte_size_attr(cache, die); + process_alignment_attr(cache, die); + } +} + +#define DEFINE_PROCESS_STRUCTURE_TYPE(structure) \ + static void process_##structure##_type( \ + struct state *state, struct die *cache, Dwarf_Die *die) \ + { \ + __process_structure_type(state, cache, die, \ + #structure "_type", \ + ___process_structure_type, \ + match_all); \ + } + +DEFINE_PROCESS_STRUCTURE_TYPE(class) +DEFINE_PROCESS_STRUCTURE_TYPE(structure) + +static void process_union_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + Dwarf_Die placeholder; + + int res = checkp(get_union_kabi_status(die, &placeholder, + &state->kabi.orig_name)); + + if (res == KABI_RESERVED) + check(process_type(state, cache, &placeholder)); + if (res > KABI_NORMAL) + return; + + __process_structure_type(state, cache, die, "union_type", + ___process_structure_type, match_all); +} + +static void process_enumerator_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + bool overridden = false; + Dwarf_Word value; + + if (stable) { + /* Get the fqn before we process anything */ + update_fqn(cache, die); + + if (kabi_is_enumerator_ignored(state->expand.current_fqn, + cache->fqn)) + return; + + overridden = kabi_get_enumerator_value( + state->expand.current_fqn, cache->fqn, &value); + } + + process_list_comma(state, cache); + process(cache, "enumerator"); + process_fqn(cache, die); + + if (overridden || get_udata_attr(die, DW_AT_const_value, &value)) { + process(cache, " = "); + process_fmt(cache, "%" PRIu64, value); + } +} + +static void process_enumeration_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_structure_type(state, cache, die, "enumeration_type", + process_type, match_enumerator_type); +} + +static void process_base_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "base_type"); + process_fqn(cache, die); + process_byte_size_attr(cache, die); + process_encoding_attr(cache, die); + process_alignment_attr(cache, die); +} + +static void process_unspecified_type(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + /* + * These can be emitted for stand-alone assembly code, which means we + * might run into them in vmlinux.o. + */ + process(cache, "unspecified_type"); +} + +static void process_cached(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + struct die_fragment *df; + Dwarf_Die child; + + list_for_each_entry(df, &cache->fragments, list) { + switch (df->type) { + case FRAGMENT_STRING: + die_debug_b("cache %p STRING '%s'", cache, + df->data.str); + process(NULL, df->data.str); + break; + case FRAGMENT_LINEBREAK: + process_linebreak(NULL, df->data.linebreak); + break; + case FRAGMENT_DIE: + if (!dwarf_die_addr_die(dwarf_cu_getdwarf(die->cu), + (void *)df->data.addr, &child)) + error("dwarf_die_addr_die failed"); + die_debug_b("cache %p DIE addr %" PRIxPTR " tag %x", + cache, df->data.addr, dwarf_tag(&child)); + check(process_type(state, NULL, &child)); + break; + default: + error("empty die_fragment"); + } + } +} + +static void state_init(struct state *state) +{ + state->expand.expand = true; + state->expand.current_fqn = NULL; + cache_init(&state->expansion_cache); +} + +static void expansion_state_restore(struct expansion_state *state, + struct expansion_state *saved) +{ + state->expand = saved->expand; + state->current_fqn = saved->current_fqn; +} + +static void expansion_state_save(struct expansion_state *state, + struct expansion_state *saved) +{ + expansion_state_restore(saved, state); +} + +static bool is_expanded_type(int tag) +{ + return tag == DW_TAG_class_type || tag == DW_TAG_structure_type || + tag == DW_TAG_union_type || tag == DW_TAG_enumeration_type; +} + +#define PROCESS_TYPE(type) \ + case DW_TAG_##type##_type: \ + process_##type##_type(state, cache, die); \ + break; + +static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) +{ + enum die_state want_state = DIE_COMPLETE; + struct die *cache; + struct expansion_state saved; + int tag = dwarf_tag(die); + + expansion_state_save(&state->expand, &saved); + + /* + * Structures and enumeration types are expanded only once per + * exported symbol. This is sufficient for detecting ABI changes + * within the structure. + */ + if (is_expanded_type(tag)) { + if (cache_was_expanded(&state->expansion_cache, die->addr)) + state->expand.expand = false; + + if (state->expand.expand) + cache_mark_expanded(&state->expansion_cache, die->addr); + else + want_state = DIE_UNEXPANDED; + } + + /* + * If we have want_state already cached, use it instead of walking + * through DWARF. + */ + cache = die_map_get(die, want_state); + + if (cache->state == want_state) { + die_debug_g("cached addr %p tag %x -- %s", die->addr, tag, + die_state_name(cache->state)); + + process_cached(state, cache, die); + die_map_add_die(parent, cache); + + expansion_state_restore(&state->expand, &saved); + return 0; + } + + die_debug_g("addr %p tag %x -- %s -> %s", die->addr, tag, + die_state_name(cache->state), die_state_name(want_state)); + + switch (tag) { + /* Type modifiers */ + PROCESS_TYPE(atomic) + PROCESS_TYPE(const) + PROCESS_TYPE(immutable) + PROCESS_TYPE(packed) + PROCESS_TYPE(pointer) + PROCESS_TYPE(reference) + PROCESS_TYPE(restrict) + PROCESS_TYPE(rvalue_reference) + PROCESS_TYPE(shared) + PROCESS_TYPE(volatile) + /* Container types */ + PROCESS_TYPE(class) + PROCESS_TYPE(structure) + PROCESS_TYPE(union) + PROCESS_TYPE(enumeration) + /* Subtypes */ + PROCESS_TYPE(enumerator) + PROCESS_TYPE(formal_parameter) + PROCESS_TYPE(member) + PROCESS_TYPE(subrange) + PROCESS_TYPE(template_type_parameter) + PROCESS_TYPE(variant) + PROCESS_TYPE(variant_part) + /* Other types */ + PROCESS_TYPE(array) + PROCESS_TYPE(base) + PROCESS_TYPE(subroutine) + PROCESS_TYPE(typedef) + PROCESS_TYPE(unspecified) + default: + error("unexpected type: %x", tag); + } + + die_debug_r("parent %p cache %p die addr %p tag %x", parent, cache, + die->addr, tag); + + /* Update cache state and append to the parent (if any) */ + cache->tag = tag; + cache->state = want_state; + die_map_add_die(parent, cache); + + expansion_state_restore(&state->expand, &saved); + return 0; +} + +/* + * Exported symbol processing + */ +static struct die *get_symbol_cache(struct state *state, Dwarf_Die *die) +{ + struct die *cache; + + cache = die_map_get(die, DIE_SYMBOL); + + if (cache->state != DIE_INCOMPLETE) + return NULL; /* We already processed a symbol for this DIE */ + + cache->tag = dwarf_tag(die); + return cache; +} + +static void process_symbol(struct state *state, Dwarf_Die *die, + die_callback_t process_func) +{ + struct die *cache; + + symbol_set_die(state->sym, die); + + cache = get_symbol_cache(state, die); + if (!cache) + return; + + debug("%s", state->sym->name); + check(process_func(state, cache, die)); + cache->state = DIE_SYMBOL; + if (dump_dies) + fputs("\n", stderr); +} + +static int __process_subprogram(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + __process_subroutine_type(state, cache, die, "subprogram"); + return 0; +} + +static void process_subprogram(struct state *state, Dwarf_Die *die) +{ + process_symbol(state, die, __process_subprogram); +} + +static int __process_variable(struct state *state, struct die *cache, + Dwarf_Die *die) +{ + process(cache, "variable "); + process_type_attr(state, cache, die); + return 0; +} + +static void process_variable(struct state *state, Dwarf_Die *die) +{ + process_symbol(state, die, __process_variable); +} + +static void save_symbol_ptr(struct state *state) +{ + Dwarf_Die ptr_type; + Dwarf_Die type; + + if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) || + dwarf_tag(&ptr_type) != DW_TAG_pointer_type) + error("%s must be a pointer type!", + get_symbol_name(&state->die)); + + if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type)) + error("%s pointer missing a type attribute?", + get_symbol_name(&state->die)); + + /* + * Save the symbol pointer DIE in case the actual symbol is + * missing from the DWARF. Clang, for example, intentionally + * omits external symbols from the debugging information. + */ + if (dwarf_tag(&type) == DW_TAG_subroutine_type) + symbol_set_ptr(state->sym, &type); + else + symbol_set_ptr(state->sym, &ptr_type); +} + +static int process_exported_symbols(struct state *unused, struct die *cache, + Dwarf_Die *die) +{ + int tag = dwarf_tag(die); + + switch (tag) { + /* Possible containers of exported symbols */ + case DW_TAG_namespace: + case DW_TAG_class_type: + case DW_TAG_structure_type: + return check(process_die_container( + NULL, cache, die, process_exported_symbols, match_all)); + + /* Possible exported symbols */ + case DW_TAG_subprogram: + case DW_TAG_variable: { + struct state state; + + if (!match_export_symbol(&state, die)) + return 0; + + state_init(&state); + + if (is_symbol_ptr(get_symbol_name(&state.die))) + save_symbol_ptr(&state); + else if (tag == DW_TAG_subprogram) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_free(&state.expansion_cache); + return 0; + } + default: + return 0; + } +} + +static void process_symbol_ptr(struct symbol *sym, void *arg) +{ + struct state state; + Dwarf *dwarf = arg; + + if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr) + return; + + debug("%s", sym->name); + state_init(&state); + state.sym = sym; + + if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die)) + error("dwarf_die_addr_die failed for symbol ptr: '%s'", + sym->name); + + if (dwarf_tag(&state.die) == DW_TAG_subroutine_type) + process_subprogram(&state, &state.die); + else + process_variable(&state, &state.die); + + cache_free(&state.expansion_cache); +} + +void process_cu(Dwarf_Die *cudie) +{ + check(process_die_container(NULL, NULL, cudie, process_exported_symbols, + match_all)); + + symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu)); + + cache_free(&srcfile_cache); +} diff --git a/scripts/gendwarfksyms/examples/kabi.h b/scripts/gendwarfksyms/examples/kabi.h new file mode 100644 index 000000000000..97a5669b083d --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Google LLC + * + * Example macros for maintaining kABI stability. + * + * This file is based on android_kabi.h, which has the following notice: + * + * Heavily influenced by rh_kabi.h which came from the RHEL/CENTOS kernel + * and was: + * Copyright (c) 2014 Don Zickus + * Copyright (c) 2015-2018 Jiri Benc + * Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa + * Copyright (c) 2016-2018 Prarit Bhargava + * Copyright (c) 2017 Paolo Abeni, Larry Woodman + */ + +#ifndef __KABI_H__ +#define __KABI_H__ + +/* Kernel macros for userspace testing. */ +#ifndef __aligned +#define __aligned(x) __attribute__((__aligned__(x))) +#endif +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif +#ifndef __PASTE +#define ___PASTE(a, b) a##b +#define __PASTE(a, b) ___PASTE(a, b) +#endif +#ifndef __stringify +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) +#endif + +#define __KABI_RULE(hint, target, value) \ + static const char __PASTE(__gendwarfksyms_rule_, \ + __COUNTER__)[] __used __aligned(1) \ + __section(".discard.gendwarfksyms.kabi_rules") = \ + "1\0" #hint "\0" #target "\0" #value + +#define __KABI_NORMAL_SIZE_ALIGN(_orig, _new) \ + union { \ + _Static_assert( \ + sizeof(struct { _new; }) <= sizeof(struct { _orig; }), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify( \ + _new) " is larger than " __stringify(_orig)); \ + _Static_assert( \ + __alignof__(struct { _new; }) <= \ + __alignof__(struct { _orig; }), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify( \ + _orig) " is not aligned the same as " __stringify(_new)); \ + } + +#define __KABI_REPLACE(_orig, _new) \ + union { \ + _new; \ + struct { \ + _orig; \ + }; \ + __KABI_NORMAL_SIZE_ALIGN(_orig, _new); \ + } + +/* + * KABI_DECLONLY(fqn) + * Treat the struct/union/enum fqn as a declaration, i.e. even if + * a definition is available, don't expand the contents. + */ +#define KABI_DECLONLY(fqn) __KABI_RULE(declonly, fqn, ) + +/* + * KABI_ENUMERATOR_IGNORE(fqn, field) + * When expanding enum fqn, skip the provided field. This makes it + * possible to hide added enum fields from versioning. + */ +#define KABI_ENUMERATOR_IGNORE(fqn, field) \ + __KABI_RULE(enumerator_ignore, fqn field, ) + +/* + * KABI_ENUMERATOR_VALUE(fqn, field, value) + * When expanding enum fqn, use the provided value for the + * specified field. This makes it possible to override enumerator + * values when calculating versions. + */ +#define KABI_ENUMERATOR_VALUE(fqn, field, value) \ + __KABI_RULE(enumerator_value, fqn field, value) + +/* + * KABI_RESERVE + * Reserve some "padding" in a structure for use by LTS backports. + * This is normally placed at the end of a structure. + * number: the "number" of the padding variable in the structure. Start with + * 1 and go up. + */ +#define KABI_RESERVE(n) unsigned long __kabi_reserved##n + +/* + * KABI_RESERVE_ARRAY + * Same as _BACKPORT_RESERVE but allocates an array with the specified + * size in bytes. + */ +#define KABI_RESERVE_ARRAY(n, s) \ + unsigned char __aligned(8) __kabi_reserved##n[s] + +/* + * KABI_IGNORE + * Add a new field that's ignored in versioning. + */ +#define KABI_IGNORE(n, _new) \ + union { \ + _new; \ + unsigned char __kabi_ignored##n; \ + } + +/* + * KABI_REPLACE + * Replace a field with a compatible new field. + */ +#define KABI_REPLACE(_oldtype, _oldname, _new) \ + __KABI_REPLACE(_oldtype __kabi_renamed##_oldname, struct { _new; }) + +/* + * KABI_USE(number, _new) + * Use a previous padding entry that was defined with KABI_RESERVE + * number: the previous "number" of the padding variable + * _new: the variable to use now instead of the padding variable + */ +#define KABI_USE(number, _new) __KABI_REPLACE(KABI_RESERVE(number), _new) + +/* + * KABI_USE2(number, _new1, _new2) + * Use a previous padding entry that was defined with KABI_RESERVE for + * two new variables that fit into 64 bits. This is good for when you do not + * want to "burn" a 64bit padding variable for a smaller variable size if not + * needed. + */ +#define KABI_USE2(number, _new1, _new2) \ + __KABI_REPLACE( \ + KABI_RESERVE(number), struct { \ + _new1; \ + _new2; \ + }) +/* + * KABI_USE_ARRAY(number, bytes, _new) + * Use a previous padding entry that was defined with KABI_RESERVE_ARRAY + * number: the previous "number" of the padding variable + * bytes: the size in bytes reserved for the array + * _new: the variable to use now instead of the padding variable + */ +#define KABI_USE_ARRAY(number, bytes, _new) \ + __KABI_REPLACE(KABI_RESERVE_ARRAY(number, bytes), _new) + +#endif /* __KABI_H__ */ diff --git a/scripts/gendwarfksyms/examples/kabi_ex.c b/scripts/gendwarfksyms/examples/kabi_ex.c new file mode 100644 index 000000000000..0b7ffd830541 --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi_ex.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kabi_ex.c + * + * Copyright (C) 2024 Google LLC + * + * Examples for kABI stability features with --stable. See kabi_ex.h + * for details. + */ + +#include "kabi_ex.h" + +struct s e0; +enum e e1; + +struct ex0a ex0a; +struct ex0b ex0b; +struct ex0c ex0c; + +struct ex1a ex1a; +struct ex1b ex1b; +struct ex1c ex1c; + +struct ex2a ex2a; +struct ex2b ex2b; +struct ex2c ex2c; + +struct ex3a ex3a; +struct ex3b ex3b; +struct ex3c ex3c; diff --git a/scripts/gendwarfksyms/examples/kabi_ex.h b/scripts/gendwarfksyms/examples/kabi_ex.h new file mode 100644 index 000000000000..1736e0f65208 --- /dev/null +++ b/scripts/gendwarfksyms/examples/kabi_ex.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kabi_ex.h + * + * Copyright (C) 2024 Google LLC + * + * Examples for kABI stability features with --stable. + */ + +/* + * The comments below each example contain the expected gendwarfksyms + * output, which can be verified using LLVM's FileCheck tool: + * + * https://llvm.org/docs/CommandGuide/FileCheck.html + * + * Usage: + * + * $ gcc -g -c examples/kabi_ex.c -o examples/kabi_ex.o + * + * $ nm examples/kabi_ex.o | awk '{ print $NF }' | \ + * ./gendwarfksyms --stable --dump-dies \ + * examples/kabi_ex.o 2>&1 >/dev/null | \ + * FileCheck examples/kabi_ex.h --check-prefix=STABLE + */ + +#ifndef __KABI_EX_H__ +#define __KABI_EX_H__ + +#include "kabi.h" + +/* + * Example: kABI rules + */ + +struct s { + int a; +}; + +KABI_DECLONLY(s); + +/* + * STABLE: variable structure_type s { + * STABLE-NEXT: } + */ + +enum e { + A, + B, + C, + D, +}; + +KABI_ENUMERATOR_IGNORE(e, B); +KABI_ENUMERATOR_IGNORE(e, C); +KABI_ENUMERATOR_VALUE(e, D, 123456789); + +/* + * STABLE: variable enumeration_type e { + * STABLE-NEXT: enumerator A = 0 , + * STABLE-NEXT: enumerator D = 123456789 + * STABLE-NEXT: } byte_size(4) +*/ + +/* + * Example: Reserved fields + */ +struct ex0a { + int a; + KABI_RESERVE(0); + KABI_RESERVE(1); +}; + +/* + * STABLE: variable structure_type ex0a { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +struct ex0b { + int a; + KABI_RESERVE(0); + KABI_USE2(1, int b, int c); +}; + +/* + * STABLE: variable structure_type ex0b { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +struct ex0c { + int a; + KABI_USE(0, void *p); + KABI_USE2(1, int b, int c); +}; + +/* + * STABLE: variable structure_type ex0c { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(8) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) data_member_location(16) + * STABLE-NEXT: } byte_size(24) + */ + +/* + * Example: A reserved array + */ + +struct ex1a { + unsigned int a; + KABI_RESERVE_ARRAY(0, 64); +}; + +/* + * STABLE: variable structure_type ex1a { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +struct ex1b { + unsigned int a; + KABI_USE_ARRAY( + 0, 64, struct { + void *p; + KABI_RESERVE_ARRAY(1, 56); + }); +}; + +/* + * STABLE: variable structure_type ex1b { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +struct ex1c { + unsigned int a; + KABI_USE_ARRAY(0, 64, void *p[8]); +}; + +/* + * STABLE: variable structure_type ex1c { + * STABLE-NEXT: member base_type unsigned int byte_size(4) encoding(7) a data_member_location(0) , + * STABLE-NEXT: member array_type[64] { + * STABLE-NEXT: base_type unsigned char byte_size(1) encoding(8) + * STABLE-NEXT: } data_member_location(8) + * STABLE-NEXT: } byte_size(72) + */ + +/* + * Example: An ignored field added to an alignment hole + */ + +struct ex2a { + int a; + unsigned long b; + int c; + unsigned long d; +}; + +/* + * STABLE: variable structure_type ex2a { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + +struct ex2b { + int a; + KABI_IGNORE(0, unsigned int n); + unsigned long b; + int c; + unsigned long d; +}; + +_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2b), "ex2a size doesn't match ex2b"); + +/* + * STABLE: variable structure_type ex2b { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + +struct ex2c { + int a; + KABI_IGNORE(0, unsigned int n); + unsigned long b; + int c; + KABI_IGNORE(1, unsigned int m); + unsigned long d; +}; + +_Static_assert(sizeof(struct ex2a) == sizeof(struct ex2c), "ex2a size doesn't match ex2c"); + +/* + * STABLE: variable structure_type ex2c { + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) a data_member_location(0) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) b data_member_location(8) + * STABLE-NEXT: member base_type int byte_size(4) encoding(5) c data_member_location(16) , + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) d data_member_location(24) + * STABLE-NEXT: } byte_size(32) + */ + + +/* + * Example: A replaced field + */ + +struct ex3a { + unsigned long a; + unsigned long unused; +}; + +/* + * STABLE: variable structure_type ex3a { + * STABLE-NEXT: member base_type [[ULONG:long unsigned int|unsigned long]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +struct ex3b { + unsigned long a; + KABI_REPLACE(unsigned long, unused, unsigned long renamed); +}; + +_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3b), "ex3a size doesn't match ex3b"); + +/* + * STABLE: variable structure_type ex3b { + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +struct ex3c { + unsigned long a; + KABI_REPLACE(unsigned long, unused, long replaced); +}; + +_Static_assert(sizeof(struct ex3a) == sizeof(struct ex3c), "ex3a size doesn't match ex3c"); + +/* + * STABLE: variable structure_type ex3c { + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) a data_member_location(0) + * STABLE-NEXT: member base_type [[ULONG]] byte_size(8) encoding(7) unused data_member_location(8) + * STABLE-NEXT: } byte_size(16) + */ + +#endif /* __KABI_EX_H__ */ diff --git a/scripts/gendwarfksyms/examples/symbolptr.c b/scripts/gendwarfksyms/examples/symbolptr.c new file mode 100644 index 000000000000..88bc1bd60da8 --- /dev/null +++ b/scripts/gendwarfksyms/examples/symbolptr.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + * + * Example for symbol pointers. When compiled with Clang, gendwarfkyms + * uses a symbol pointer for `f`. + * + * $ clang -g -c examples/symbolptr.c -o examples/symbolptr.o + * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o + */ + +/* Kernel macros for userspace testing. */ +#ifndef __used +#define __used __attribute__((__used__)) +#endif +#ifndef __section +#define __section(section) __attribute__((__section__(section))) +#endif + +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; + +extern void f(unsigned int arg); +void g(int *arg); +void g(int *arg) {} + +struct s; +extern struct s *p; + +__GENDWARFKSYMS_EXPORT(f); +__GENDWARFKSYMS_EXPORT(g); +__GENDWARFKSYMS_EXPORT(p); diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c new file mode 100644 index 000000000000..08ae61eb327e --- /dev/null +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include <fcntl.h> +#include <getopt.h> +#include <errno.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include "gendwarfksyms.h" + +/* + * Options + */ + +/* Print debugging information to stderr */ +int debug; +/* Dump DIE contents */ +int dump_dies; +/* Print debugging information about die_map changes */ +int dump_die_map; +/* Print out type strings (i.e. type_map) */ +int dump_types; +/* Print out expanded type strings used for symbol versions */ +int dump_versions; +/* Support kABI stability features */ +int stable; +/* Write a symtypes file */ +int symtypes; +static const char *symtypes_file; + +static void usage(void) +{ + fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n" + "Options:\n" + " -d, --debug Print debugging information\n" + " --dump-dies Dump DWARF DIE contents\n" + " --dump-die-map Print debugging information about die_map changes\n" + " --dump-types Dump type strings\n" + " --dump-versions Dump expanded type strings used for symbol versions\n" + " -s, --stable Support kABI stability features\n" + " -T, --symtypes file Write a symtypes file\n" + " -h, --help Print this message\n" + "\n", + stderr); +} + +static int process_module(Dwfl_Module *mod, void **userdata, const char *name, + Dwarf_Addr base, void *arg) +{ + Dwarf_Addr dwbias; + Dwarf_Die cudie; + Dwarf_CU *cu = NULL; + Dwarf *dbg; + FILE *symfile = arg; + int res; + + debug("%s", name); + dbg = dwfl_module_getdwarf(mod, &dwbias); + + /* + * Look for exported symbols in each CU, follow the DIE tree, and add + * the entries to die_map. + */ + do { + res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL); + if (res < 0) + error("dwarf_get_units failed: no debugging information?"); + if (res == 1) + break; /* No more units */ + + process_cu(&cudie); + } while (cu); + + /* + * Use die_map to expand type strings, write them to `symfile`, and + * calculate symbol versions. + */ + generate_symtypes_and_versions(symfile); + die_map_free(); + + return DWARF_CB_OK; +} + +static const Dwfl_Callbacks callbacks = { + .section_address = dwfl_offline_section_address, + .find_debuginfo = dwfl_standard_find_debuginfo, +}; + +int main(int argc, char **argv) +{ + FILE *symfile = NULL; + unsigned int n; + int opt; + + static const struct option opts[] = { + { "debug", 0, NULL, 'd' }, + { "dump-dies", 0, &dump_dies, 1 }, + { "dump-die-map", 0, &dump_die_map, 1 }, + { "dump-types", 0, &dump_types, 1 }, + { "dump-versions", 0, &dump_versions, 1 }, + { "stable", 0, NULL, 's' }, + { "symtypes", 1, NULL, 'T' }, + { "help", 0, NULL, 'h' }, + { 0, 0, NULL, 0 } + }; + + while ((opt = getopt_long(argc, argv, "dsT:h", opts, NULL)) != EOF) { + switch (opt) { + case 0: + break; + case 'd': + debug = 1; + break; + case 's': + stable = 1; + break; + case 'T': + symtypes = 1; + symtypes_file = optarg; + break; + case 'h': + usage(); + return 0; + default: + usage(); + return 1; + } + } + + if (dump_die_map) + dump_dies = 1; + + if (optind >= argc) { + usage(); + error("no input files?"); + } + + symbol_read_exports(stdin); + + if (symtypes_file) { + symfile = fopen(symtypes_file, "w"); + if (!symfile) + error("fopen failed for '%s': %s", symtypes_file, + strerror(errno)); + } + + for (n = optind; n < argc; n++) { + Dwfl *dwfl; + int fd; + + fd = open(argv[n], O_RDONLY); + if (fd == -1) + error("open failed for '%s': %s", argv[n], + strerror(errno)); + + symbol_read_symtab(fd); + kabi_read_rules(fd); + + dwfl = dwfl_begin(&callbacks); + if (!dwfl) + error("dwfl_begin failed for '%s': %s", argv[n], + dwarf_errmsg(-1)); + + if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd)) + error("dwfl_report_offline failed for '%s': %s", + argv[n], dwarf_errmsg(-1)); + + dwfl_report_end(dwfl, NULL, NULL); + + if (dwfl_getmodules(dwfl, &process_module, symfile, 0)) + error("dwfl_getmodules failed for '%s'", argv[n]); + + dwfl_end(dwfl); + kabi_free(); + } + + if (symfile) + check(fclose(symfile)); + + symbol_print_versions(); + symbol_free(); + + return 0; +} diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h new file mode 100644 index 000000000000..197a1a8123c6 --- /dev/null +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -0,0 +1,296 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Google LLC + */ + +#include <dwarf.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <stdlib.h> +#include <stdio.h> + +#include <hash.h> +#include <hashtable.h> +#include <xalloc.h> + +#ifndef __GENDWARFKSYMS_H +#define __GENDWARFKSYMS_H + +/* + * Options -- in gendwarfksyms.c + */ +extern int debug; +extern int dump_dies; +extern int dump_die_map; +extern int dump_types; +extern int dump_versions; +extern int stable; +extern int symtypes; + +/* + * Output helpers + */ +#define __PREFIX "gendwarfksyms: " +#define __println(prefix, format, ...) \ + fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \ + ##__VA_ARGS__) + +#define debug(format, ...) \ + do { \ + if (debug) \ + __println("", format, ##__VA_ARGS__); \ + } while (0) + +#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__) +#define error(format, ...) \ + do { \ + __println("error: ", format, ##__VA_ARGS__); \ + exit(1); \ + } while (0) + +#define __die_debug(color, format, ...) \ + do { \ + if (dump_dies && dump_die_map) \ + fprintf(stderr, \ + "\033[" #color "m<" format ">\033[39m", \ + __VA_ARGS__); \ + } while (0) + +#define die_debug_r(format, ...) __die_debug(91, format, __VA_ARGS__) +#define die_debug_g(format, ...) __die_debug(92, format, __VA_ARGS__) +#define die_debug_b(format, ...) __die_debug(94, format, __VA_ARGS__) + +/* + * Error handling helpers + */ +#define __check(expr, test) \ + ({ \ + int __res = expr; \ + if (test) \ + error("`%s` failed: %d", #expr, __res); \ + __res; \ + }) + +/* Error == non-zero values */ +#define check(expr) __check(expr, __res) +/* Error == negative values */ +#define checkp(expr) __check(expr, __res < 0) + +/* Consistent aliases (DW_TAG_<type>_type) for DWARF tags */ +#define DW_TAG_enumerator_type DW_TAG_enumerator +#define DW_TAG_formal_parameter_type DW_TAG_formal_parameter +#define DW_TAG_member_type DW_TAG_member +#define DW_TAG_template_type_parameter_type DW_TAG_template_type_parameter +#define DW_TAG_typedef_type DW_TAG_typedef +#define DW_TAG_variant_part_type DW_TAG_variant_part +#define DW_TAG_variant_type DW_TAG_variant + +/* + * symbols.c + */ + +/* See symbols.c:is_symbol_ptr */ +#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_" +#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1) + +static inline unsigned int addr_hash(uintptr_t addr) +{ + return hash_ptr((const void *)addr); +} + +enum symbol_state { + SYMBOL_UNPROCESSED, + SYMBOL_MAPPED, + SYMBOL_PROCESSED +}; + +struct symbol_addr { + uint32_t section; + Elf64_Addr address; +}; + +struct symbol { + const char *name; + struct symbol_addr addr; + struct hlist_node addr_hash; + struct hlist_node name_hash; + enum symbol_state state; + uintptr_t die_addr; + uintptr_t ptr_die_addr; + unsigned long crc; +}; + +typedef void (*symbol_callback_t)(struct symbol *, void *arg); + +bool is_symbol_ptr(const char *name); +void symbol_read_exports(FILE *file); +void symbol_read_symtab(int fd); +struct symbol *symbol_get(const char *name); +void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr); +void symbol_set_die(struct symbol *sym, Dwarf_Die *die); +void symbol_set_crc(struct symbol *sym, unsigned long crc); +void symbol_for_each(symbol_callback_t func, void *arg); +void symbol_print_versions(void); +void symbol_free(void); + +/* + * die.c + */ + +enum die_state { + DIE_INCOMPLETE, + DIE_UNEXPANDED, + DIE_COMPLETE, + DIE_SYMBOL, + DIE_LAST = DIE_SYMBOL +}; + +enum die_fragment_type { + FRAGMENT_EMPTY, + FRAGMENT_STRING, + FRAGMENT_LINEBREAK, + FRAGMENT_DIE +}; + +struct die_fragment { + enum die_fragment_type type; + union { + char *str; + int linebreak; + uintptr_t addr; + } data; + struct list_head list; +}; + +#define CASE_CONST_TO_STR(name) \ + case name: \ + return #name; + +static inline const char *die_state_name(enum die_state state) +{ + switch (state) { + CASE_CONST_TO_STR(DIE_INCOMPLETE) + CASE_CONST_TO_STR(DIE_UNEXPANDED) + CASE_CONST_TO_STR(DIE_COMPLETE) + CASE_CONST_TO_STR(DIE_SYMBOL) + } + + error("unexpected die_state: %d", state); +} + +struct die { + enum die_state state; + bool mapped; + char *fqn; + int tag; + uintptr_t addr; + struct list_head fragments; + struct hlist_node hash; +}; + +typedef void (*die_map_callback_t)(struct die *, void *arg); + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res); +struct die *die_map_get(Dwarf_Die *die, enum die_state state); +void die_map_add_string(struct die *pd, const char *str); +void die_map_add_linebreak(struct die *pd, int linebreak); +void die_map_for_each(die_map_callback_t func, void *arg); +void die_map_add_die(struct die *pd, struct die *child); +void die_map_free(void); + +/* + * cache.c + */ + +#define CACHE_HASH_BITS 10 + +/* A cache for addresses we've already seen. */ +struct cache { + HASHTABLE_DECLARE(cache, 1 << CACHE_HASH_BITS); +}; + +void cache_set(struct cache *cache, unsigned long key, int value); +int cache_get(struct cache *cache, unsigned long key); +void cache_init(struct cache *cache); +void cache_free(struct cache *cache); + +static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr) +{ + cache_set(cache, addr, 1); +} + +static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr) +{ + return cache_get(cache, addr) == 1; +} + +static inline void cache_mark_expanded(struct cache *cache, void *addr) +{ + __cache_mark_expanded(cache, (uintptr_t)addr); +} + +static inline bool cache_was_expanded(struct cache *cache, void *addr) +{ + return __cache_was_expanded(cache, (uintptr_t)addr); +} + +/* + * dwarf.c + */ + +struct expansion_state { + bool expand; + const char *current_fqn; +}; + +struct kabi_state { + int members; + Dwarf_Die placeholder; + const char *orig_name; +}; + +struct state { + struct symbol *sym; + Dwarf_Die die; + + /* List expansion */ + bool first_list_item; + + /* Structure expansion */ + struct expansion_state expand; + struct cache expansion_cache; + + /* Reserved or ignored members */ + struct kabi_state kabi; +}; + +typedef int (*die_callback_t)(struct state *state, struct die *cache, + Dwarf_Die *die); +typedef bool (*die_match_callback_t)(Dwarf_Die *die); +bool match_all(Dwarf_Die *die); + +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match); + +void process_cu(Dwarf_Die *cudie); + +/* + * types.c + */ + +void generate_symtypes_and_versions(FILE *file); + +/* + * kabi.c + */ + +bool kabi_is_enumerator_ignored(const char *fqn, const char *field); +bool kabi_get_enumerator_value(const char *fqn, const char *field, + unsigned long *value); +bool kabi_is_declonly(const char *fqn); + +void kabi_read_rules(int fd); +void kabi_free(void); + +#endif /* __GENDWARFKSYMS_H */ diff --git a/scripts/gendwarfksyms/kabi.c b/scripts/gendwarfksyms/kabi.c new file mode 100644 index 000000000000..66f01fcd1607 --- /dev/null +++ b/scripts/gendwarfksyms/kabi.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> + +#include "gendwarfksyms.h" + +#define KABI_RULE_SECTION ".discard.gendwarfksyms.kabi_rules" +#define KABI_RULE_VERSION "1" + +/* + * The rule section consists of four null-terminated strings per + * entry: + * + * 1. version + * Entry format version. Must match KABI_RULE_VERSION. + * + * 2. type + * Type of the kABI rule. Must be one of the tags defined below. + * + * 3. target + * Rule-dependent target, typically the fully qualified name of + * the target DIE. + * + * 4. value + * Rule-dependent value. + */ +#define KABI_RULE_MIN_ENTRY_SIZE \ + (/* version\0 */ 2 + /* type\0 */ 2 + /* target\0" */ 1 + \ + /* value\0 */ 1) +#define KABI_RULE_EMPTY_VALUE "" + +/* + * Rule: declonly + * - For the struct/enum/union in the target field, treat it as a + * declaration only even if a definition is available. + */ +#define KABI_RULE_TAG_DECLONLY "declonly" + +/* + * Rule: enumerator_ignore + * - For the enum_field in the target field, ignore the enumerator. + */ +#define KABI_RULE_TAG_ENUMERATOR_IGNORE "enumerator_ignore" + +/* + * Rule: enumerator_value + * - For the fqn_field in the target field, set the value to the + * unsigned integer in the value field. + */ +#define KABI_RULE_TAG_ENUMERATOR_VALUE "enumerator_value" + +enum kabi_rule_type { + KABI_RULE_TYPE_UNKNOWN, + KABI_RULE_TYPE_DECLONLY, + KABI_RULE_TYPE_ENUMERATOR_IGNORE, + KABI_RULE_TYPE_ENUMERATOR_VALUE, +}; + +#define RULE_HASH_BITS 7 + +struct rule { + enum kabi_rule_type type; + const char *target; + const char *value; + struct hlist_node hash; +}; + +/* { type, target } -> struct rule */ +static HASHTABLE_DEFINE(rules, 1 << RULE_HASH_BITS); + +static inline unsigned int rule_values_hash(enum kabi_rule_type type, + const char *target) +{ + return hash_32(type) ^ hash_str(target); +} + +static inline unsigned int rule_hash(const struct rule *rule) +{ + return rule_values_hash(rule->type, rule->target); +} + +static inline const char *get_rule_field(const char **pos, ssize_t *left) +{ + const char *start = *pos; + size_t len; + + if (*left <= 0) + error("unexpected end of kABI rules"); + + len = strnlen(start, *left) + 1; + *pos += len; + *left -= len; + + return start; +} + +void kabi_read_rules(int fd) +{ + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *rule_data = NULL; + Elf_Scn *scn; + Elf *elf; + size_t shstrndx; + const char *rule_str; + ssize_t left; + int i; + + const struct { + enum kabi_rule_type type; + const char *tag; + } rule_types[] = { + { + .type = KABI_RULE_TYPE_DECLONLY, + .tag = KABI_RULE_TAG_DECLONLY, + }, + { + .type = KABI_RULE_TYPE_ENUMERATOR_IGNORE, + .tag = KABI_RULE_TAG_ENUMERATOR_IGNORE, + }, + { + .type = KABI_RULE_TYPE_ENUMERATOR_VALUE, + .tag = KABI_RULE_TAG_ENUMERATOR_VALUE, + }, + }; + + if (!stable) + return; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + if (elf_getshdrstrndx(elf, &shstrndx) < 0) + error("elf_getshdrstrndx failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn(elf, NULL); + + while (scn) { + const char *sname; + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + sname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!sname) + error("elf_strptr failed: %s", elf_errmsg(-1)); + + if (!strcmp(sname, KABI_RULE_SECTION)) { + rule_data = elf_getdata(scn, NULL); + if (!rule_data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + break; + } + + scn = elf_nextscn(elf, scn); + } + + if (!rule_data) { + debug("kABI rules not found"); + check(elf_end(elf)); + return; + } + + rule_str = rule_data->d_buf; + left = shdr->sh_size; + + if (left < KABI_RULE_MIN_ENTRY_SIZE) + error("kABI rule section too small: %zd bytes", left); + + if (rule_str[left - 1] != '\0') + error("kABI rules are not null-terminated"); + + while (left > KABI_RULE_MIN_ENTRY_SIZE) { + enum kabi_rule_type type = KABI_RULE_TYPE_UNKNOWN; + const char *field; + struct rule *rule; + + /* version */ + field = get_rule_field(&rule_str, &left); + + if (strcmp(field, KABI_RULE_VERSION)) + error("unsupported kABI rule version: '%s'", field); + + /* type */ + field = get_rule_field(&rule_str, &left); + + for (i = 0; i < ARRAY_SIZE(rule_types); i++) { + if (!strcmp(field, rule_types[i].tag)) { + type = rule_types[i].type; + break; + } + } + + if (type == KABI_RULE_TYPE_UNKNOWN) + error("unsupported kABI rule type: '%s'", field); + + rule = xmalloc(sizeof(struct rule)); + + rule->type = type; + rule->target = xstrdup(get_rule_field(&rule_str, &left)); + rule->value = xstrdup(get_rule_field(&rule_str, &left)); + + hash_add(rules, &rule->hash, rule_hash(rule)); + + debug("kABI rule: type: '%s', target: '%s', value: '%s'", field, + rule->target, rule->value); + } + + if (left > 0) + warn("unexpected data at the end of the kABI rules section"); + + check(elf_end(elf)); +} + +bool kabi_is_declonly(const char *fqn) +{ + struct rule *rule; + + if (!stable) + return false; + if (!fqn || !*fqn) + return false; + + hash_for_each_possible(rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_DECLONLY, fqn)) { + if (rule->type == KABI_RULE_TYPE_DECLONLY && + !strcmp(fqn, rule->target)) + return true; + } + + return false; +} + +static char *get_enumerator_target(const char *fqn, const char *field) +{ + char *target = NULL; + + if (asprintf(&target, "%s %s", fqn, field) < 0) + error("asprintf failed for '%s %s'", fqn, field); + + return target; +} + +static unsigned long get_ulong_value(const char *value) +{ + unsigned long result = 0; + char *endptr = NULL; + + errno = 0; + result = strtoul(value, &endptr, 10); + + if (errno || *endptr) + error("invalid unsigned value '%s'", value); + + return result; +} + +bool kabi_is_enumerator_ignored(const char *fqn, const char *field) +{ + bool match = false; + struct rule *rule; + char *target; + + if (!stable) + return false; + if (!fqn || !*fqn || !field || !*field) + return false; + + target = get_enumerator_target(fqn, field); + + hash_for_each_possible( + rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_IGNORE, target)) { + if (rule->type == KABI_RULE_TYPE_ENUMERATOR_IGNORE && + !strcmp(target, rule->target)) { + match = true; + break; + } + } + + free(target); + return match; +} + +bool kabi_get_enumerator_value(const char *fqn, const char *field, + unsigned long *value) +{ + bool match = false; + struct rule *rule; + char *target; + + if (!stable) + return false; + if (!fqn || !*fqn || !field || !*field) + return false; + + target = get_enumerator_target(fqn, field); + + hash_for_each_possible(rules, rule, hash, + rule_values_hash(KABI_RULE_TYPE_ENUMERATOR_VALUE, + target)) { + if (rule->type == KABI_RULE_TYPE_ENUMERATOR_VALUE && + !strcmp(target, rule->target)) { + *value = get_ulong_value(rule->value); + match = true; + break; + } + } + + free(target); + return match; +} + +void kabi_free(void) +{ + struct hlist_node *tmp; + struct rule *rule; + + hash_for_each_safe(rules, rule, tmp, hash) { + free((void *)rule->target); + free((void *)rule->value); + free(rule); + } + + hash_init(rules); +} diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c new file mode 100644 index 000000000000..327f87389c34 --- /dev/null +++ b/scripts/gendwarfksyms/symbols.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" + +#define SYMBOL_HASH_BITS 12 + +/* struct symbol_addr -> struct symbol */ +static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS); +/* name -> struct symbol */ +static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS); + +static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr) +{ + return hash_32(addr->section ^ addr_hash(addr->address)); +} + +static unsigned int __for_each_addr(struct symbol *sym, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match = NULL; + unsigned int processed = 0; + + hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash, + symbol_addr_hash(&sym->addr)) { + if (match == sym) + continue; /* Already processed */ + + if (match->addr.section == sym->addr.section && + match->addr.address == sym->addr.address) { + func(match, data); + ++processed; + } + } + + return processed; +} + +/* + * For symbols without debugging information (e.g. symbols defined in other + * TUs), we also match __gendwarfksyms_ptr_<symbol_name> symbols, which the + * kernel uses to ensure type information is present in the TU that exports + * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the + * exported symbol, e.g.: + * + * typeof(symname) *__gendwarf_ptr_symname = &symname; + */ +bool is_symbol_ptr(const char *name) +{ + return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN); +} + +static unsigned int for_each(const char *name, symbol_callback_t func, + void *data) +{ + struct hlist_node *tmp; + struct symbol *match; + + if (!name || !*name) + return 0; + if (is_symbol_ptr(name)) + name += SYMBOL_PTR_PREFIX_LEN; + + hash_for_each_possible_safe(symbol_names, match, tmp, name_hash, + hash_str(name)) { + if (strcmp(match->name, name)) + continue; + + /* Call func for the match, and all address matches */ + if (func) + func(match, data); + + if (match->addr.section != SHN_UNDEF) + return __for_each_addr(match, func, data) + 1; + + return 1; + } + + return 0; +} + +static void set_crc(struct symbol *sym, void *data) +{ + unsigned long *crc = data; + + if (sym->state == SYMBOL_PROCESSED && sym->crc != *crc) + warn("overriding version for symbol %s (crc %lx vs. %lx)", + sym->name, sym->crc, *crc); + + sym->state = SYMBOL_PROCESSED; + sym->crc = *crc; +} + +void symbol_set_crc(struct symbol *sym, unsigned long crc) +{ + if (for_each(sym->name, set_crc, &crc) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static void set_ptr(struct symbol *sym, void *data) +{ + sym->ptr_die_addr = (uintptr_t)((Dwarf_Die *)data)->addr; +} + +void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr) +{ + if (for_each(sym->name, set_ptr, ptr) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static void set_die(struct symbol *sym, void *data) +{ + sym->die_addr = (uintptr_t)((Dwarf_Die *)data)->addr; + sym->state = SYMBOL_MAPPED; +} + +void symbol_set_die(struct symbol *sym, Dwarf_Die *die) +{ + if (for_each(sym->name, set_die, die) == 0) + error("no matching symbols: '%s'", sym->name); +} + +static bool is_exported(const char *name) +{ + return for_each(name, NULL, NULL) > 0; +} + +void symbol_read_exports(FILE *file) +{ + struct symbol *sym; + char *line = NULL; + char *name = NULL; + size_t size = 0; + int nsym = 0; + + while (getline(&line, &size, file) > 0) { + if (sscanf(line, "%ms\n", &name) != 1) + error("malformed input line: %s", line); + + if (is_exported(name)) { + /* Ignore duplicates */ + free(name); + continue; + } + + sym = xcalloc(1, sizeof(struct symbol)); + sym->name = name; + sym->addr.section = SHN_UNDEF; + sym->state = SYMBOL_UNPROCESSED; + + hash_add(symbol_names, &sym->name_hash, hash_str(sym->name)); + ++nsym; + + debug("%s", sym->name); + } + + free(line); + debug("%d exported symbols", nsym); +} + +static void get_symbol(struct symbol *sym, void *arg) +{ + struct symbol **res = arg; + + if (sym->state == SYMBOL_UNPROCESSED) + *res = sym; +} + +struct symbol *symbol_get(const char *name) +{ + struct symbol *sym = NULL; + + for_each(name, get_symbol, &sym); + return sym; +} + +void symbol_for_each(symbol_callback_t func, void *arg) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + func(sym, arg); + } +} + +typedef void (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg); + +static void elf_for_each_global(int fd, elf_symbol_callback_t func, void *arg) +{ + size_t sym_size; + GElf_Shdr shdr_mem; + GElf_Shdr *shdr; + Elf_Data *xndx_data = NULL; + Elf_Scn *scn; + Elf *elf; + + if (elf_version(EV_CURRENT) != EV_CURRENT) + error("elf_version failed: %s", elf_errmsg(-1)); + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) + error("elf_begin failed: %s", elf_errmsg(-1)); + + scn = elf_nextscn(elf, NULL); + + while (scn) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + if (shdr->sh_type == SHT_SYMTAB_SHNDX) { + xndx_data = elf_getdata(scn, NULL); + if (!xndx_data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + break; + } + + scn = elf_nextscn(elf, scn); + } + + sym_size = gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT); + scn = elf_nextscn(elf, NULL); + + while (scn) { + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + error("gelf_getshdr failed: %s", elf_errmsg(-1)); + + if (shdr->sh_type == SHT_SYMTAB) { + unsigned int nsyms; + unsigned int n; + Elf_Data *data = elf_getdata(scn, NULL); + + if (!data) + error("elf_getdata failed: %s", elf_errmsg(-1)); + + if (shdr->sh_entsize != sym_size) + error("expected sh_entsize (%lu) to be %zu", + shdr->sh_entsize, sym_size); + + nsyms = shdr->sh_size / shdr->sh_entsize; + + for (n = 1; n < nsyms; ++n) { + const char *name = NULL; + Elf32_Word xndx = 0; + GElf_Sym sym_mem; + GElf_Sym *sym; + + sym = gelf_getsymshndx(data, xndx_data, n, + &sym_mem, &xndx); + if (!sym) + error("gelf_getsymshndx failed: %s", + elf_errmsg(-1)); + + if (GELF_ST_BIND(sym->st_info) == STB_LOCAL) + continue; + + if (sym->st_shndx != SHN_XINDEX) + xndx = sym->st_shndx; + + name = elf_strptr(elf, shdr->sh_link, + sym->st_name); + if (!name) + error("elf_strptr failed: %s", + elf_errmsg(-1)); + + /* Skip empty symbol names */ + if (*name) + func(name, sym, xndx, arg); + } + } + + scn = elf_nextscn(elf, scn); + } + + check(elf_end(elf)); +} + +static void set_symbol_addr(struct symbol *sym, void *arg) +{ + struct symbol_addr *addr = arg; + + if (sym->addr.section == SHN_UNDEF) { + sym->addr = *addr; + hash_add(symbol_addrs, &sym->addr_hash, + symbol_addr_hash(&sym->addr)); + + debug("%s -> { %u, %lx }", sym->name, sym->addr.section, + sym->addr.address); + } else if (sym->addr.section != addr->section || + sym->addr.address != addr->address) { + warn("multiple addresses for symbol %s?", sym->name); + } +} + +static void elf_set_symbol_addr(const char *name, GElf_Sym *sym, + Elf32_Word xndx, void *arg) +{ + struct symbol_addr addr = { .section = xndx, .address = sym->st_value }; + + /* Set addresses for exported symbols */ + if (addr.section != SHN_UNDEF) + for_each(name, set_symbol_addr, &addr); +} + +void symbol_read_symtab(int fd) +{ + elf_for_each_global(fd, elf_set_symbol_addr, NULL); +} + +void symbol_print_versions(void) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + if (sym->state != SYMBOL_PROCESSED) + warn("no information for symbol %s", sym->name); + + printf("#SYMVER %s 0x%08lx\n", sym->name, sym->crc); + } +} + +void symbol_free(void) +{ + struct hlist_node *tmp; + struct symbol *sym; + + hash_for_each_safe(symbol_names, sym, tmp, name_hash) { + free((void *)sym->name); + free(sym); + } + + hash_init(symbol_addrs); + hash_init(symbol_names); +} diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c new file mode 100644 index 000000000000..6c03265f4d10 --- /dev/null +++ b/scripts/gendwarfksyms/types.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Google LLC + */ + +#define _GNU_SOURCE +#include <inttypes.h> +#include <stdio.h> +#include <zlib.h> + +#include "gendwarfksyms.h" + +static struct cache expansion_cache; + +/* + * A simple linked list of shared or owned strings to avoid copying strings + * around when not necessary. + */ +struct type_list_entry { + const char *str; + void *owned; + struct list_head list; +}; + +static void type_list_free(struct list_head *list) +{ + struct type_list_entry *entry; + struct type_list_entry *tmp; + + list_for_each_entry_safe(entry, tmp, list, list) { + if (entry->owned) + free(entry->owned); + free(entry); + } + + INIT_LIST_HEAD(list); +} + +static int type_list_append(struct list_head *list, const char *s, void *owned) +{ + struct type_list_entry *entry; + + if (!s) + return 0; + + entry = xmalloc(sizeof(struct type_list_entry)); + entry->str = s; + entry->owned = owned; + list_add_tail(&entry->list, list); + + return strlen(entry->str); +} + +static void type_list_write(struct list_head *list, FILE *file) +{ + struct type_list_entry *entry; + + list_for_each_entry(entry, list, list) { + if (entry->str) + checkp(fputs(entry->str, file)); + } +} + +/* + * An expanded type string in symtypes format. + */ +struct type_expansion { + char *name; + size_t len; + struct list_head expanded; + struct hlist_node hash; +}; + +static void type_expansion_init(struct type_expansion *type) +{ + type->name = NULL; + type->len = 0; + INIT_LIST_HEAD(&type->expanded); +} + +static inline void type_expansion_free(struct type_expansion *type) +{ + free(type->name); + type->name = NULL; + type->len = 0; + type_list_free(&type->expanded); +} + +static void type_expansion_append(struct type_expansion *type, const char *s, + void *owned) +{ + type->len += type_list_append(&type->expanded, s, owned); +} + +/* + * type_map -- the longest expansions for each type. + * + * const char *name -> struct type_expansion * + */ +#define TYPE_HASH_BITS 12 +static HASHTABLE_DEFINE(type_map, 1 << TYPE_HASH_BITS); + +static int type_map_get(const char *name, struct type_expansion **res) +{ + struct type_expansion *e; + + hash_for_each_possible(type_map, e, hash, hash_str(name)) { + if (!strcmp(name, e->name)) { + *res = e; + return 0; + } + } + + return -1; +} + +static void type_map_add(const char *name, struct type_expansion *type) +{ + struct type_expansion *e; + + if (type_map_get(name, &e)) { + e = xmalloc(sizeof(struct type_expansion)); + type_expansion_init(e); + e->name = xstrdup(name); + + hash_add(type_map, &e->hash, hash_str(e->name)); + + if (dump_types) + debug("adding %s", e->name); + } else { + /* Use the longest available expansion */ + if (type->len <= e->len) + return; + + type_list_free(&e->expanded); + + if (dump_types) + debug("replacing %s", e->name); + } + + /* Take ownership of type->expanded */ + list_replace_init(&type->expanded, &e->expanded); + e->len = type->len; + + if (dump_types) { + checkp(fputs(e->name, stderr)); + checkp(fputs(" ", stderr)); + type_list_write(&e->expanded, stderr); + checkp(fputs("\n", stderr)); + } +} + +static void type_map_write(FILE *file) +{ + struct type_expansion *e; + struct hlist_node *tmp; + + if (!file) + return; + + hash_for_each_safe(type_map, e, tmp, hash) { + checkp(fputs(e->name, file)); + checkp(fputs(" ", file)); + type_list_write(&e->expanded, file); + checkp(fputs("\n", file)); + } +} + +static void type_map_free(void) +{ + struct type_expansion *e; + struct hlist_node *tmp; + + hash_for_each_safe(type_map, e, tmp, hash) { + type_expansion_free(e); + free(e); + } + + hash_init(type_map); +} + +/* + * CRC for a type, with an optional fully expanded type string for + * debugging. + */ +struct version { + struct type_expansion type; + unsigned long crc; +}; + +static void version_init(struct version *version) +{ + version->crc = crc32(0, NULL, 0); + type_expansion_init(&version->type); +} + +static void version_free(struct version *version) +{ + type_expansion_free(&version->type); +} + +static void version_add(struct version *version, const char *s) +{ + version->crc = crc32(version->crc, (void *)s, strlen(s)); + if (dump_versions) + type_expansion_append(&version->type, s, NULL); +} + +/* + * Type reference format: <prefix>#<name>, where prefix: + * s -> structure + * u -> union + * e -> enum + * t -> typedef + * + * Names with spaces are additionally wrapped in single quotes. + */ +static inline bool is_type_prefix(const char *s) +{ + return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') && + s[1] == '#'; +} + +static char get_type_prefix(int tag) +{ + switch (tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + return 's'; + case DW_TAG_union_type: + return 'u'; + case DW_TAG_enumeration_type: + return 'e'; + case DW_TAG_typedef_type: + return 't'; + default: + return 0; + } +} + +static char *get_type_name(struct die *cache) +{ + const char *quote; + char prefix; + char *name; + + if (cache->state == DIE_INCOMPLETE) { + warn("found incomplete cache entry: %p", cache); + return NULL; + } + if (cache->state == DIE_SYMBOL) + return NULL; + if (!cache->fqn || !*cache->fqn) + return NULL; + + prefix = get_type_prefix(cache->tag); + if (!prefix) + return NULL; + + /* Wrap names with spaces in single quotes */ + quote = strstr(cache->fqn, " ") ? "'" : ""; + + /* <prefix>#<type_name>\0 */ + if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) + error("asprintf failed for '%s'", cache->fqn); + + return name; +} + +static void __calculate_version(struct version *version, struct list_head *list) +{ + struct type_list_entry *entry; + struct type_expansion *e; + + /* Calculate a CRC over an expanded type string */ + list_for_each_entry(entry, list, list) { + if (is_type_prefix(entry->str)) { + check(type_map_get(entry->str, &e)); + + /* + * It's sufficient to expand each type reference just + * once to detect changes. + */ + if (cache_was_expanded(&expansion_cache, e)) { + version_add(version, entry->str); + } else { + cache_mark_expanded(&expansion_cache, e); + __calculate_version(version, &e->expanded); + } + } else { + version_add(version, entry->str); + } + } +} + +static void calculate_version(struct version *version, struct list_head *list) +{ + version_init(version); + __calculate_version(version, list); + cache_free(&expansion_cache); +} + +static void __type_expand(struct die *cache, struct type_expansion *type, + bool recursive); + +static void type_expand_child(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct type_expansion child; + char *name; + + name = get_type_name(cache); + if (!name) { + __type_expand(cache, type, recursive); + return; + } + + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { + __cache_mark_expanded(&expansion_cache, cache->addr); + type_expansion_init(&child); + __type_expand(cache, &child, true); + type_map_add(name, &child); + type_expansion_free(&child); + } + + type_expansion_append(type, name, name); +} + +static void __type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct die_fragment *df; + struct die *child; + + list_for_each_entry(df, &cache->fragments, list) { + switch (df->type) { + case FRAGMENT_STRING: + type_expansion_append(type, df->data.str, NULL); + break; + case FRAGMENT_DIE: + /* Use a complete die_map expansion if available */ + if (__die_map_get(df->data.addr, DIE_COMPLETE, + &child) && + __die_map_get(df->data.addr, DIE_UNEXPANDED, + &child)) + error("unknown child: %" PRIxPTR, + df->data.addr); + + type_expand_child(child, type, recursive); + break; + case FRAGMENT_LINEBREAK: + /* + * Keep whitespace in the symtypes format, but avoid + * repeated spaces. + */ + if (list_is_last(&df->list, &cache->fragments) || + list_next_entry(df, list)->type != + FRAGMENT_LINEBREAK) + type_expansion_append(type, " ", NULL); + break; + default: + error("empty die_fragment in %p", cache); + } + } +} + +static void type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + type_expansion_init(type); + __type_expand(cache, type, recursive); + cache_free(&expansion_cache); +} + +static void expand_type(struct die *cache, void *arg) +{ + struct type_expansion type; + char *name; + + if (cache->mapped) + return; + + cache->mapped = true; + + /* + * Skip unexpanded die_map entries if there's a complete + * expansion available for this DIE. + */ + if (cache->state == DIE_UNEXPANDED && + !__die_map_get(cache->addr, DIE_COMPLETE, &cache)) { + if (cache->mapped) + return; + + cache->mapped = true; + } + + name = get_type_name(cache); + if (!name) + return; + + debug("%s", name); + type_expand(cache, &type, true); + type_map_add(name, &type); + + type_expansion_free(&type); + free(name); +} + +static void expand_symbol(struct symbol *sym, void *arg) +{ + struct type_expansion type; + struct version version; + struct die *cache; + + /* + * No need to expand again unless we want a symtypes file entry + * for the symbol. Note that this means `sym` has the same address + * as another symbol that was already processed. + */ + if (!symtypes && sym->state == SYMBOL_PROCESSED) + return; + + if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache)) + return; /* We'll warn about missing CRCs later. */ + + type_expand(cache, &type, false); + + /* If the symbol already has a version, don't calculate it again. */ + if (sym->state != SYMBOL_PROCESSED) { + calculate_version(&version, &type.expanded); + symbol_set_crc(sym, version.crc); + debug("%s = %lx", sym->name, version.crc); + + if (dump_versions) { + checkp(fputs(sym->name, stderr)); + checkp(fputs(" ", stderr)); + type_list_write(&version.type.expanded, stderr); + checkp(fputs("\n", stderr)); + } + + version_free(&version); + } + + /* These aren't needed in type_map unless we want a symtypes file. */ + if (symtypes) + type_map_add(sym->name, &type); + + type_expansion_free(&type); +} + +void generate_symtypes_and_versions(FILE *file) +{ + cache_init(&expansion_cache); + + /* + * die_map processing: + * + * 1. die_map contains all types referenced in exported symbol + * signatures, but can contain duplicates just like the original + * DWARF, and some references may not be fully expanded depending + * on how far we processed the DIE tree for that specific symbol. + * + * For each die_map entry, find the longest available expansion, + * and add it to type_map. + */ + die_map_for_each(expand_type, NULL); + + /* + * 2. For each exported symbol, expand the die_map type, and use + * type_map expansions to calculate a symbol version from the + * fully expanded type string. + */ + symbol_for_each(expand_symbol, NULL); + + /* + * 3. If a symtypes file is requested, write type_map contents to + * the file. + */ + type_map_write(file); + type_map_free(); +} diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile index 312edccda736..4350311fb7b3 100644 --- a/scripts/genksyms/Makefile +++ b/scripts/genksyms/Makefile @@ -4,24 +4,6 @@ hostprogs-always-y += genksyms genksyms-objs := genksyms.o parse.tab.o lex.lex.o -# FIXME: fix the ambiguous grammar in parse.y and delete this hack -# -# Suppress shift/reduce, reduce/reduce conflicts warnings -# unless W=1 is specified. -# -# Just in case, run "$(YACC) --version" without suppressing stderr -# so that 'bison: not found' will be displayed if it is missing. -ifeq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) - -quiet_cmd_bison_no_warn = $(quiet_cmd_bison) - cmd_bison_no_warn = $(YACC) --version >/dev/null; \ - $(cmd_bison) 2>/dev/null - -$(obj)/pars%.tab.c $(obj)/pars%.tab.h: $(src)/pars%.y FORCE - $(call if_changed,bison_no_warn) - -endif - # -I needed for generated C source to include headers in source tree HOSTCFLAGS_parse.tab.o := -I $(src) HOSTCFLAGS_lex.lex.o := -I $(src) diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 07f9b8cfb233..8b0d7ac73dbb 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -12,18 +12,19 @@ #include <stdio.h> #include <string.h> +#include <stdint.h> #include <stdlib.h> #include <unistd.h> #include <assert.h> #include <stdarg.h> #include <getopt.h> +#include <hashtable.h> + #include "genksyms.h" /*----------------------------------------------------------------------*/ -#define HASH_BUCKETS 4096 - -static struct symbol *symtab[HASH_BUCKETS]; +static HASHTABLE_DEFINE(symbol_hashtable, 1U << 12); static FILE *debugfile; int cur_line = 1; @@ -60,7 +61,7 @@ static void print_type_name(enum symbol_type type, const char *name); /*----------------------------------------------------------------------*/ -static const unsigned int crctab32[] = { +static const uint32_t crctab32[] = { 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, @@ -115,19 +116,19 @@ static const unsigned int crctab32[] = { 0x2d02ef8dU }; -static unsigned long partial_crc32_one(unsigned char c, unsigned long crc) +static uint32_t partial_crc32_one(uint8_t c, uint32_t crc) { return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); } -static unsigned long partial_crc32(const char *s, unsigned long crc) +static uint32_t partial_crc32(const char *s, uint32_t crc) { while (*s) crc = partial_crc32_one(*s++, crc); return crc; } -static unsigned long crc32(const char *s) +static uint32_t crc32(const char *s) { return partial_crc32(s, 0xffffffff) ^ 0xffffffff; } @@ -151,14 +152,14 @@ static enum symbol_type map_to_ns(enum symbol_type t) struct symbol *find_symbol(const char *name, enum symbol_type ns, int exact) { - unsigned long h = crc32(name) % HASH_BUCKETS; struct symbol *sym; - for (sym = symtab[h]; sym; sym = sym->hash_next) + hash_for_each_possible(symbol_hashtable, sym, hnode, crc32(name)) { if (map_to_ns(sym->type) == map_to_ns(ns) && strcmp(name, sym->name) == 0 && sym->is_declared) break; + } if (exact && sym && sym->type != ns) return NULL; @@ -224,64 +225,56 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, return NULL; } - h = crc32(name) % HASH_BUCKETS; - for (sym = symtab[h]; sym; sym = sym->hash_next) { - if (map_to_ns(sym->type) == map_to_ns(type) && - strcmp(name, sym->name) == 0) { - if (is_reference) - /* fall through */ ; - else if (sym->type == type && - equal_list(sym->defn, defn)) { - if (!sym->is_declared && sym->is_override) { - print_location(); - print_type_name(type, name); - fprintf(stderr, " modversion is " - "unchanged\n"); - } - sym->is_declared = 1; - return sym; - } else if (!sym->is_declared) { - if (sym->is_override && flag_preserve) { - print_location(); - fprintf(stderr, "ignoring "); - print_type_name(type, name); - fprintf(stderr, " modversion change\n"); - sym->is_declared = 1; - return sym; - } else { - status = is_unknown_symbol(sym) ? - STATUS_DEFINED : STATUS_MODIFIED; - } - } else { - error_with_pos("redefinition of %s", name); - return sym; + h = crc32(name); + hash_for_each_possible(symbol_hashtable, sym, hnode, h) { + if (map_to_ns(sym->type) != map_to_ns(type) || + strcmp(name, sym->name)) + continue; + + if (is_reference) { + break; + } else if (sym->type == type && equal_list(sym->defn, defn)) { + if (!sym->is_declared && sym->is_override) { + print_location(); + print_type_name(type, name); + fprintf(stderr, " modversion is unchanged\n"); } + sym->is_declared = 1; + } else if (sym->is_declared) { + error_with_pos("redefinition of %s", name); + } else if (sym->is_override && flag_preserve) { + print_location(); + fprintf(stderr, "ignoring "); + print_type_name(type, name); + fprintf(stderr, " modversion change\n"); + sym->is_declared = 1; + } else { + status = is_unknown_symbol(sym) ? + STATUS_DEFINED : STATUS_MODIFIED; break; } + free_list(defn, NULL); + return sym; } if (sym) { - struct symbol **psym; + hash_del(&sym->hnode); - for (psym = &symtab[h]; *psym; psym = &(*psym)->hash_next) { - if (*psym == sym) { - *psym = sym->hash_next; - break; - } - } + free_list(sym->defn, NULL); + free(sym->name); + free(sym); --nsyms; } sym = xmalloc(sizeof(*sym)); - sym->name = name; + sym->name = xstrdup(name); sym->type = type; sym->defn = defn; sym->expansion_trail = NULL; sym->visited = NULL; sym->is_extern = is_extern; - sym->hash_next = symtab[h]; - symtab[h] = sym; + hash_add(symbol_hashtable, &sym->hnode, h); sym->is_declared = !is_reference; sym->status = status; @@ -480,7 +473,7 @@ static void read_reference(FILE *f) defn = def; def = read_node(f); } - subsym = add_reference_symbol(xstrdup(sym->string), sym->tag, + subsym = add_reference_symbol(sym->string, sym->tag, defn, is_extern); subsym->is_override = is_override; free_node(sym); @@ -525,7 +518,7 @@ static void print_list(FILE * f, struct string_list *list) } } -static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc) +static uint32_t expand_and_crc_sym(struct symbol *sym, uint32_t crc) { struct string_list *list = sym->defn; struct string_list **e, **b; @@ -632,7 +625,7 @@ static unsigned long expand_and_crc_sym(struct symbol *sym, unsigned long crc) void export_symbol(const char *name) { struct symbol *sym; - unsigned long crc; + uint32_t crc; int has_changed = 0; sym = find_symbol(name, SYM_NORMAL, 0); @@ -680,7 +673,7 @@ void export_symbol(const char *name) if (flag_dump_defs) fputs(">\n", debugfile); - printf("#SYMVER %s 0x%08lx\n", name, crc); + printf("#SYMVER %s 0x%08lx\n", name, (unsigned long)crc); } /*----------------------------------------------------------------------*/ @@ -832,9 +825,9 @@ int main(int argc, char **argv) } if (flag_debug) { - fprintf(debugfile, "Hash table occupancy %d/%d = %g\n", - nsyms, HASH_BUCKETS, - (double)nsyms / (double)HASH_BUCKETS); + fprintf(debugfile, "Hash table occupancy %d/%zd = %g\n", + nsyms, HASH_SIZE(symbol_hashtable), + (double)nsyms / HASH_SIZE(symbol_hashtable)); } if (dumpfile) diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h index 21ed2ec2d98c..0c355075f0e6 100644 --- a/scripts/genksyms/genksyms.h +++ b/scripts/genksyms/genksyms.h @@ -12,8 +12,11 @@ #ifndef MODUTILS_GENKSYMS_H #define MODUTILS_GENKSYMS_H 1 +#include <stdbool.h> #include <stdio.h> +#include <list_types.h> + enum symbol_type { SYM_NORMAL, SYM_TYPEDEF, SYM_ENUM, SYM_STRUCT, SYM_UNION, SYM_ENUM_CONST @@ -31,8 +34,8 @@ struct string_list { }; struct symbol { - struct symbol *hash_next; - const char *name; + struct hlist_node hnode; + char *name; enum symbol_type type; struct string_list *defn; struct symbol *expansion_trail; @@ -64,6 +67,8 @@ struct string_list *copy_list_range(struct string_list *start, int yylex(void); int yyparse(void); +extern bool dont_want_type_specifier; + void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2))); /*----------------------------------------------------------------------*/ diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l index a4d7495eaf75..22aeb57649d9 100644 --- a/scripts/genksyms/lex.l +++ b/scripts/genksyms/lex.l @@ -12,6 +12,7 @@ %{ #include <limits.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <ctype.h> @@ -50,6 +51,7 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) %% +u?int(8|16|32|64)x(1|2|4|8|16)_t return BUILTIN_INT_KEYW; /* Keep track of our location in the original source files. */ ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; @@ -113,6 +115,12 @@ MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) /* The second stage lexer. Here we incorporate knowledge of the state of the parser to tailor the tokens that are returned. */ +/* + * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an + * IDENT. We need a hint from the parser to handle this accurately. + */ +bool dont_want_type_specifier; + int yylex(void) { @@ -207,7 +215,7 @@ repeat: goto repeat; } } - if (!suppress_type_lookup) + if (!suppress_type_lookup && !dont_want_type_specifier) { if (find_symbol(yytext, SYM_TYPEDEF, 1)) token = TYPE; @@ -431,7 +439,12 @@ fini: if (suppress_type_lookup > 0) --suppress_type_lookup; - if (dont_want_brace_phrase > 0) + + /* + * __attribute__() can be placed immediately after the 'struct' keyword. + * e.g.) struct __attribute__((__packed__)) foo { ... }; + */ + if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0) --dont_want_brace_phrase; yylval = &next_node->next; diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index 8e9b5e69e8f0..ee600a804fa1 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -12,6 +12,7 @@ %{ #include <assert.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include "genksyms.h" @@ -148,32 +149,45 @@ simple_declaration: current_name = NULL; } $$ = $3; + dont_want_type_specifier = false; } ; init_declarator_list_opt: - /* empty */ { $$ = NULL; } - | init_declarator_list + /* empty */ { $$ = NULL; } + | init_declarator_list { free_list(decl_spec, NULL); $$ = $1; } ; init_declarator_list: init_declarator { struct string_list *decl = *$1; *$1 = NULL; + + /* avoid sharing among multiple init_declarators */ + if (decl_spec) + decl_spec = copy_list_range(decl_spec, NULL); + add_symbol(current_name, is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern); current_name = NULL; $$ = $1; + dont_want_type_specifier = true; } - | init_declarator_list ',' init_declarator - { struct string_list *decl = *$3; - *$3 = NULL; + | init_declarator_list ',' attribute_opt init_declarator + { struct string_list *decl = *$4; + *$4 = NULL; free_list(*$2, NULL); *$2 = decl_spec; + + /* avoid sharing among multiple init_declarators */ + if (decl_spec) + decl_spec = copy_list_range(decl_spec, NULL); + add_symbol(current_name, is_typedef ? SYM_TYPEDEF : SYM_NORMAL, decl, is_extern); current_name = NULL; - $$ = $3; + $$ = $4; + dont_want_type_specifier = true; } ; @@ -189,8 +203,9 @@ decl_specifier_seq_opt: ; decl_specifier_seq: - decl_specifier { decl_spec = *$1; } + attribute_opt decl_specifier { decl_spec = *$2; } | decl_specifier_seq decl_specifier { decl_spec = *$2; } + | decl_specifier_seq ATTRIBUTE_PHRASE { decl_spec = *$2; } ; decl_specifier: @@ -200,7 +215,8 @@ decl_specifier: remove_node($1); $$ = $1; } - | type_specifier + | type_specifier { dont_want_type_specifier = true; $$ = $1; } + | type_qualifier ; storage_class_specifier: @@ -213,24 +229,23 @@ storage_class_specifier: type_specifier: simple_type_specifier - | cvar_qualifier | TYPEOF_KEYW '(' parameter_declaration ')' | TYPEOF_PHRASE /* References to s/u/e's defined elsewhere. Rearrange things so that it is easier to expand the definition fully later. */ - | STRUCT_KEYW IDENT - { remove_node($1); (*$2)->tag = SYM_STRUCT; $$ = $2; } - | UNION_KEYW IDENT - { remove_node($1); (*$2)->tag = SYM_UNION; $$ = $2; } + | STRUCT_KEYW attribute_opt IDENT + { remove_node($1); (*$3)->tag = SYM_STRUCT; $$ = $3; } + | UNION_KEYW attribute_opt IDENT + { remove_node($1); (*$3)->tag = SYM_UNION; $$ = $3; } | ENUM_KEYW IDENT { remove_node($1); (*$2)->tag = SYM_ENUM; $$ = $2; } /* Full definitions of an s/u/e. Record it. */ - | STRUCT_KEYW IDENT class_body - { record_compound($1, $2, $3, SYM_STRUCT); $$ = $3; } - | UNION_KEYW IDENT class_body - { record_compound($1, $2, $3, SYM_UNION); $$ = $3; } + | STRUCT_KEYW attribute_opt IDENT class_body + { record_compound($1, $3, $4, SYM_STRUCT); $$ = $4; } + | UNION_KEYW attribute_opt IDENT class_body + { record_compound($1, $3, $4, SYM_UNION); $$ = $4; } | ENUM_KEYW IDENT enum_body { record_compound($1, $2, $3, SYM_ENUM); $$ = $3; } /* @@ -239,8 +254,8 @@ type_specifier: | ENUM_KEYW enum_body { add_symbol(NULL, SYM_ENUM, NULL, 0); $$ = $2; } /* Anonymous s/u definitions. Nothing needs doing. */ - | STRUCT_KEYW class_body { $$ = $2; } - | UNION_KEYW class_body { $$ = $2; } + | STRUCT_KEYW attribute_opt class_body { $$ = $3; } + | UNION_KEYW attribute_opt class_body { $$ = $3; } ; simple_type_specifier: @@ -260,22 +275,24 @@ simple_type_specifier: ; ptr_operator: - '*' cvar_qualifier_seq_opt + '*' type_qualifier_seq_opt { $$ = $2 ? $2 : $1; } ; -cvar_qualifier_seq_opt: +type_qualifier_seq_opt: /* empty */ { $$ = NULL; } - | cvar_qualifier_seq + | type_qualifier_seq ; -cvar_qualifier_seq: - cvar_qualifier - | cvar_qualifier_seq cvar_qualifier { $$ = $2; } +type_qualifier_seq: + type_qualifier + | ATTRIBUTE_PHRASE + | type_qualifier_seq type_qualifier { $$ = $2; } + | type_qualifier_seq ATTRIBUTE_PHRASE { $$ = $2; } ; -cvar_qualifier: - CONST_KEYW | VOLATILE_KEYW | ATTRIBUTE_PHRASE +type_qualifier: + CONST_KEYW | VOLATILE_KEYW | RESTRICT_KEYW { /* restrict has no effect in prototypes so ignore it */ remove_node($1); @@ -297,15 +314,7 @@ direct_declarator: current_name = (*$1)->string; $$ = $1; } - } - | TYPE - { if (current_name != NULL) { - error_with_pos("unexpected second declaration name"); - YYERROR; - } else { - current_name = (*$1)->string; - $$ = $1; - } + dont_want_type_specifier = false; } | direct_declarator '(' parameter_declaration_clause ')' { $$ = $4; } @@ -325,16 +334,19 @@ nested_declarator: ; direct_nested_declarator: - IDENT - | TYPE - | direct_nested_declarator '(' parameter_declaration_clause ')' + direct_nested_declarator1 + | direct_nested_declarator1 '(' parameter_declaration_clause ')' { $$ = $4; } - | direct_nested_declarator '(' error ')' + ; + +direct_nested_declarator1: + IDENT { $$ = $1; dont_want_type_specifier = false; } + | direct_nested_declarator1 '(' error ')' { $$ = $4; } - | direct_nested_declarator BRACKET_PHRASE + | direct_nested_declarator1 BRACKET_PHRASE { $$ = $2; } - | '(' nested_declarator ')' - { $$ = $3; } + | '(' attribute_opt nested_declarator ')' + { $$ = $4; } | '(' error ')' { $$ = $3; } ; @@ -352,45 +364,57 @@ parameter_declaration_list_opt: parameter_declaration_list: parameter_declaration + { $$ = $1; dont_want_type_specifier = false; } | parameter_declaration_list ',' parameter_declaration - { $$ = $3; } + { $$ = $3; dont_want_type_specifier = false; } ; parameter_declaration: - decl_specifier_seq m_abstract_declarator + decl_specifier_seq abstract_declarator_opt { $$ = $2 ? $2 : $1; } ; -m_abstract_declarator: - ptr_operator m_abstract_declarator +abstract_declarator_opt: + /* empty */ { $$ = NULL; } + | abstract_declarator + ; + +abstract_declarator: + ptr_operator + | ptr_operator abstract_declarator { $$ = $2 ? $2 : $1; } - | direct_m_abstract_declarator + | direct_abstract_declarator attribute_opt + { $$ = $2; dont_want_type_specifier = false; } ; -direct_m_abstract_declarator: - /* empty */ { $$ = NULL; } - | IDENT +direct_abstract_declarator: + direct_abstract_declarator1 + | direct_abstract_declarator1 open_paren parameter_declaration_clause ')' + { $$ = $4; } + | open_paren parameter_declaration_clause ')' + { $$ = $3; } + ; + +direct_abstract_declarator1: + IDENT { /* For version 2 checksums, we don't want to remember private parameter names. */ remove_node($1); $$ = $1; } - /* This wasn't really a typedef name but an identifier that - shadows one. */ - | TYPE - { remove_node($1); - $$ = $1; - } - | direct_m_abstract_declarator '(' parameter_declaration_clause ')' - { $$ = $4; } - | direct_m_abstract_declarator '(' error ')' + | direct_abstract_declarator1 open_paren error ')' { $$ = $4; } - | direct_m_abstract_declarator BRACKET_PHRASE + | direct_abstract_declarator1 BRACKET_PHRASE { $$ = $2; } - | '(' m_abstract_declarator ')' - { $$ = $3; } - | '(' error ')' + | open_paren attribute_opt abstract_declarator ')' + { $$ = $4; } + | open_paren error ')' { $$ = $3; } + | BRACKET_PHRASE + ; + +open_paren: + '(' { $$ = $1; dont_want_type_specifier = false; } ; function_definition: @@ -430,9 +454,9 @@ member_specification: member_declaration: decl_specifier_seq_opt member_declarator_list_opt ';' - { $$ = $3; } + { $$ = $3; dont_want_type_specifier = false; } | error ';' - { $$ = $2; } + { $$ = $2; dont_want_type_specifier = false; } ; member_declarator_list_opt: @@ -442,7 +466,9 @@ member_declarator_list_opt: member_declarator_list: member_declarator - | member_declarator_list ',' member_declarator { $$ = $3; } + { $$ = $1; dont_want_type_specifier = true; } + | member_declarator_list ',' member_declarator + { $$ = $3; dont_want_type_specifier = true; } ; member_declarator: @@ -457,7 +483,7 @@ member_bitfield_declarator: attribute_opt: /* empty */ { $$ = NULL; } - | attribute_opt ATTRIBUTE_PHRASE + | attribute_opt ATTRIBUTE_PHRASE { $$ = $2; } ; enum_body: @@ -472,12 +498,12 @@ enumerator_list: enumerator: IDENT { - const char *name = strdup((*$1)->string); + const char *name = (*$1)->string; add_symbol(name, SYM_ENUM_CONST, NULL, 0); } | IDENT '=' EXPRESSION_PHRASE { - const char *name = strdup((*$1)->string); + const char *name = (*$1)->string; struct string_list *expr = copy_list_range(*$3, *$2); add_symbol(name, SYM_ENUM_CONST, expr, 0); } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index a0a0be38cbdc..fb50bd4f4103 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -105,9 +105,11 @@ configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARC all-config-fragments = $(call configfiles,*.config) config-fragments = $(call configfiles,$@) +cmd_merge_fragments = $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) + %.config: $(obj)/conf $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture)) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments) + $(call cmd,merge_fragments) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig PHONY += tinyconfig diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 4286d5e7f95d..3b55e7a4131d 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -360,10 +360,12 @@ int conf_read_simple(const char *name, int def) *p = '\0'; - in = zconf_fopen(env); + name = env; + + in = zconf_fopen(name); if (in) { conf_message("using defaults found in %s", - env); + name); goto load; } diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 6c92ef1e16ef..eaa465b0ccf9 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1464,8 +1464,8 @@ void ConfigMainWindow::loadConfig(void) { QString str; - str = QFileDialog::getOpenFileName(this, "", configname); - if (str.isNull()) + str = QFileDialog::getOpenFileName(this, QString(), configname); + if (str.isEmpty()) return; if (conf_read(str.toLocal8Bit().constData())) @@ -1491,8 +1491,8 @@ void ConfigMainWindow::saveConfigAs(void) { QString str; - str = QFileDialog::getSaveFileName(this, "", configname); - if (str.isNull()) + str = QFileDialog::getSaveFileName(this, QString(), configname); + if (str.isEmpty()) return; if (conf_write(str.toLocal8Bit().constData())) { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 89b84bf8e21f..7beb59dec5a0 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -388,6 +388,7 @@ static void sym_warn_unmet_dep(const struct symbol *sym) " Selected by [m]:\n"); fputs(str_get(&gs), stderr); + str_free(&gs); sym_warnings++; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 7ea59dc4926b..e18ae7dc8140 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -33,6 +33,10 @@ static bool module_enabled; static bool modversions; /* Is CONFIG_MODULE_SRCVERSION_ALL set? */ static bool all_versions; +/* Is CONFIG_BASIC_MODVERSIONS set? */ +static bool basic_modversions; +/* Is CONFIG_EXTENDED_MODVERSIONS set? */ +static bool extended_modversions; /* If we are modposting external module set to 1 */ static bool external_module; /* Only warn about unresolved symbols */ @@ -1806,13 +1810,56 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod) } /** + * Record CRCs for unresolved symbols, supporting long names + */ +static void add_extended_versions(struct buffer *b, struct module *mod) +{ + struct symbol *s; + + if (!extended_modversions) + return; + + buf_printf(b, "\n"); + buf_printf(b, "static const u32 ____version_ext_crcs[]\n"); + buf_printf(b, "__used __section(\"__version_ext_crcs\") = {\n"); + list_for_each_entry(s, &mod->unresolved_symbols, list) { + if (!s->module) + continue; + if (!s->crc_valid) { + warn("\"%s\" [%s.ko] has no CRC!\n", + s->name, mod->name); + continue; + } + buf_printf(b, "\t0x%08x,\n", s->crc); + } + buf_printf(b, "};\n"); + + buf_printf(b, "static const char ____version_ext_names[]\n"); + buf_printf(b, "__used __section(\"__version_ext_names\") =\n"); + list_for_each_entry(s, &mod->unresolved_symbols, list) { + if (!s->module) + continue; + if (!s->crc_valid) + /* + * We already warned on this when producing the crc + * table. + * We need to skip its name too, as the indexes in + * both tables need to align. + */ + continue; + buf_printf(b, "\t\"%s\\0\"\n", s->name); + } + buf_printf(b, ";\n"); +} + +/** * Record CRCs for unresolved symbols **/ static void add_versions(struct buffer *b, struct module *mod) { struct symbol *s; - if (!modversions) + if (!basic_modversions) return; buf_printf(b, "\n"); @@ -1828,11 +1875,16 @@ static void add_versions(struct buffer *b, struct module *mod) continue; } if (strlen(s->name) >= MODULE_NAME_LEN) { - error("too long symbol \"%s\" [%s.ko]\n", - s->name, mod->name); - break; + if (extended_modversions) { + /* this symbol will only be in the extended info */ + continue; + } else { + error("too long symbol \"%s\" [%s.ko]\n", + s->name, mod->name); + break; + } } - buf_printf(b, "\t{ %#8x, \"%s\" },\n", + buf_printf(b, "\t{ 0x%08x, \"%s\" },\n", s->crc, s->name); } @@ -1961,6 +2013,7 @@ static void write_mod_c_file(struct module *mod) add_header(&buf, mod); add_exported_symbols(&buf, mod); add_versions(&buf, mod); + add_extended_versions(&buf, mod); add_depends(&buf, mod); buf_printf(&buf, "\n"); @@ -2126,7 +2179,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:xb")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2175,6 +2228,12 @@ int main(int argc, char **argv) case 'd': missing_namespace_deps = optarg; break; + case 'b': + basic_modversions = true; + break; + case 'x': + extended_modversions = true; + break; default: exit(1); } diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index dca706617adc..0cf3a55b05e1 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -22,7 +22,6 @@ license=(GPL-2.0-only) makedepends=( bc bison - cpio flex gettext kmod diff --git a/scripts/package/builddeb b/scripts/package/builddeb index ad7aba0f268e..3627ca227e5a 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -5,10 +5,12 @@ # # Simple script to generate a deb package for a Linux kernel. All the # complexity of what to do with a kernel after it is installed or removed -# is left to other scripts and packages: they can install scripts in the -# /etc/kernel/{pre,post}{inst,rm}.d/ directories (or an alternative location -# specified in KDEB_HOOKDIR) that will be called on package install and -# removal. +# is left to other scripts and packages. Scripts can be placed into the +# preinst, postinst, prerm and postrm directories in /etc/kernel or +# /usr/share/kernel. A different list of search directories can be given +# via KDEB_HOOKDIR. Scripts in directories earlier in the list will +# override scripts of the same name in later directories. The script will +# be called on package installation and removal. set -eu @@ -74,10 +76,8 @@ install_maint_scripts () { # kernel packages, as well as kernel packages built using make-kpkg. # make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and # so do we; recent versions of dracut and initramfs-tools will obey this. - debhookdir=${KDEB_HOOKDIR:-/etc/kernel} + debhookdir=${KDEB_HOOKDIR:-/etc/kernel /usr/share/kernel} for script in postinst postrm preinst prerm; do - mkdir -p "${pdir}${debhookdir}/${script}.d" - mkdir -p "${pdir}/DEBIAN" cat <<-EOF > "${pdir}/DEBIAN/${script}" #!/bin/sh @@ -90,7 +90,15 @@ install_maint_scripts () { # Tell initramfs builder whether it's wanted export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No) - test -d ${debhookdir}/${script}.d && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" ${debhookdir}/${script}.d + # run-parts will error out if one of its directory arguments does not + # exist, so filter the list of hook directories accordingly. + hookdirs= + for dir in ${debhookdir}; do + test -d "\$dir/${script}.d" || continue + hookdirs="\$hookdirs \$dir/${script}.d" + done + hookdirs="\${hookdirs# }" + test -n "\$hookdirs" && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" \$hookdirs exit 0 EOF chmod 755 "${pdir}/DEBIAN/${script}" diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index d3c5b104c063..bb6e23c1174e 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -49,17 +49,10 @@ mkdir -p "${destdir}" # This caters to host programs that participate in Kbuild. objtool and # resolve_btfids are out of scope. if [ "${CC}" != "${HOSTCC}" ]; then - echo "Rebuilding host programs with ${CC}..." - - # This leverages external module building. - # - Clear sub_make_done to allow the top-level Makefile to redo sub-make. - # - Filter out --no-print-directory to print "Entering directory" logs - # when Make changes the working directory. - unset sub_make_done - MAKEFLAGS=$(echo "${MAKEFLAGS}" | sed s/--no-print-directory//) - - cat <<-'EOF' > "${destdir}/Kbuild" - subdir-y := scripts + cat "${destdir}/scripts/Makefile" - <<-'EOF' > "${destdir}/scripts/Kbuild" + subdir-y += basic + hostprogs-always-y += mod/modpost + mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o) EOF # HOSTCXX is not overridden. The C++ compiler is used to build: @@ -67,20 +60,12 @@ if [ "${CC}" != "${HOSTCC}" ]; then # - GCC plugins, which will not work on the installed system even after # being rebuilt. # - # Use the single-target build to avoid the modpost invocation, which - # would overwrite Module.symvers. - "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ - - cat <<-'EOF' > "${destdir}/scripts/Kbuild" - subdir-y := basic - hostprogs-always-y := mod/modpost - mod/modpost-objs := $(addprefix mod/, modpost.o file2alias.o sumversion.o symsearch.o) - EOF - - # Run once again to rebuild scripts/basic/ and scripts/mod/modpost. - "${MAKE}" HOSTCC="${CC}" KBUILD_OUTPUT=. KBUILD_EXTMOD="${destdir}" scripts/ + # Clear VPATH and srcroot because the source files reside in the output + # directory. + # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts - rm -f "${destdir}/Kbuild" "${destdir}/scripts/Kbuild" + rm -f "${destdir}/scripts/Kbuild" fi find "${destdir}" \( -name '.*.cmd' -o -name '*.o' \) -delete diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index b038a1380b8a..b6dd98ca860b 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -205,7 +205,7 @@ Priority: optional Maintainer: $maintainer Rules-Requires-Root: no Build-Depends: debhelper-compat (= 12) -Build-Depends-Arch: bc, bison, cpio, flex, +Build-Depends-Arch: bc, bison, flex, gcc-${host_gnu} <!pkg.${sourcename}.nokernelheaders>, kmod, libelf-dev:native, libssl-dev:native, libssl-dev <!pkg.${sourcename}.nokernelheaders>, diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 8a3384342e8d..6c2b6a62d9d2 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3245,7 +3245,7 @@ static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream, if (copy_from_user(&xfern, _xfern, sizeof(xfern))) return -EFAULT; - bufs = memdup_user(xfern.bufs, sizeof(void *) * runtime->channels); + bufs = memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *)); if (IS_ERR(bufs)) return PTR_ERR(bufs); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 84393f4f429d..8923813ce424 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -80,7 +80,11 @@ static int compare_input_type(const void *ap, const void *bp) /* In case one has boost and the other one has not, pick the one with boost first. */ - return (int)(b->has_boost_on_pin - a->has_boost_on_pin); + if (a->has_boost_on_pin != b->has_boost_on_pin) + return (int)(b->has_boost_on_pin - a->has_boost_on_pin); + + /* Keep the original order */ + return a->order - b->order; } /* Reorder the surround channels @@ -400,6 +404,8 @@ int snd_hda_parse_pin_defcfg(struct hda_codec *codec, reorder_outputs(cfg->speaker_outs, cfg->speaker_pins); /* sort inputs in the order of AUTO_PIN_* type */ + for (i = 0; i < cfg->num_inputs; i++) + cfg->inputs[i].order = i; sort(cfg->inputs, cfg->num_inputs, sizeof(cfg->inputs[0]), compare_input_type, NULL); diff --git a/sound/pci/hda/hda_auto_parser.h b/sound/pci/hda/hda_auto_parser.h index 579b11beac71..87af3d8c02f7 100644 --- a/sound/pci/hda/hda_auto_parser.h +++ b/sound/pci/hda/hda_auto_parser.h @@ -37,6 +37,7 @@ struct auto_pin_cfg_item { unsigned int is_headset_mic:1; unsigned int is_headphone_mic:1; /* Mic-only in headphone jack */ unsigned int has_boost_on_pin:1; + int order; }; struct auto_pin_cfg; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d3c9ed963588..8192be394d0d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7497,6 +7497,16 @@ static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; } +/* + * Clear COEF 0x0d (PCBEEP passthrough) bit 0x40 where BIOS sets it wrongly + * at PM resume + */ +static void alc283_fixup_dell_hp_resume(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_INIT) + alc_write_coef_idx(codec, 0xd, 0x2800); +} enum { ALC269_FIXUP_GPIO2, @@ -7799,6 +7809,7 @@ enum { ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE, ALC233_FIXUP_MEDION_MTL_SPK, ALC294_FIXUP_BASS_SPEAKER_15, + ALC283_FIXUP_DELL_HP_RESUME, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10143,6 +10154,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_bass_speaker_15, }, + [ALC283_FIXUP_DELL_HP_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc283_fixup_dell_hp_resume, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10203,6 +10218,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0604, "Dell Venue 11 Pro 7130", ALC283_FIXUP_DELL_HP_RESUME), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x062c, "Dell Latitude E5550", ALC292_FIXUP_DELL_E7X), @@ -10918,7 +10934,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), HDA_CODEC_QUIRK(0x17aa, 0x386e, "Legion Y9000X 2022 IAH7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x386e, "Yoga Pro 7 14ARP8", ALC285_FIXUP_SPEAKER2_TO_DAC1), - HDA_CODEC_QUIRK(0x17aa, 0x386f, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), + HDA_CODEC_QUIRK(0x17aa, 0x38a8, "Legion Pro 7 16ARX8H", ALC287_FIXUP_TAS2781_I2C), /* this must match before PCI SSID 17aa:386f below */ SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 1f59ee248771..89e99ed4275a 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -181,6 +181,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas break; default: dev_err(dev, "Unknown chip revision %d\n", chip->acp_rev); + spin_unlock_irq(&adata->acp_lock); return -EINVAL; } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index ecf57a6cb7c3..b16587d8f97a 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -307,6 +307,34 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83L3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83N6"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q2"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index ca4cc954efa8..eb97ac73ec06 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2203,6 +2203,8 @@ static int da7213_i2c_probe(struct i2c_client *i2c) return ret; } + mutex_init(&da7213->ctrl_lock); + pm_runtime_set_autosuspend_delay(&i2c->dev, 100); pm_runtime_use_autosuspend(&i2c->dev); pm_runtime_set_active(&i2c->dev); diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index f508df01145b..e7bd561a8f40 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -101,7 +101,7 @@ static const struct snd_kcontrol_new es8316_snd_controls[] = { SOC_DOUBLE_R_TLV("DAC Playback Volume", ES8316_DAC_VOLL, ES8316_DAC_VOLR, 0, 0xc0, 1, dac_vol_tlv), SOC_SINGLE("DAC Soft Ramp Switch", ES8316_DAC_SET1, 4, 1, 1), - SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 4, 0), + SOC_SINGLE("DAC Soft Ramp Rate", ES8316_DAC_SET1, 2, 3, 0), SOC_SINGLE("DAC Notch Filter Switch", ES8316_DAC_SET2, 6, 1, 0), SOC_SINGLE("DAC Double Fs Switch", ES8316_DAC_SET2, 7, 1, 0), SOC_SINGLE("DAC Stereo Enhancement", ES8316_DAC_SET3, 0, 7, 0), diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b06eead7e0f6..066d92b54312 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -911,7 +911,7 @@ static void es8326_jack_detect_handler(struct work_struct *work) regmap_write(es8326->regmap, ES8326_INT_SOURCE, (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x0d); queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(400)); es8326->hp = 1; @@ -1023,7 +1023,7 @@ static void es8326_init(struct snd_soc_component *component) struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); regmap_write(es8326->regmap, ES8326_RESET, 0x1f); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); + regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x3E); regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0); usleep_range(10000, 15000); regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9); diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2b3c0f9e178c..9cb74962161a 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -1091,8 +1091,7 @@ static int rt5514_set_bias_level(struct snd_soc_component *component, static int rt5514_probe(struct snd_soc_component *component) { struct rt5514_priv *rt5514 = snd_soc_component_get_drvdata(component); - struct platform_device *pdev = container_of(component->dev, - struct platform_device, dev); + struct platform_device *pdev = to_platform_device(component->dev); rt5514->mclk = devm_clk_get_optional(component->dev, "mclk"); if (IS_ERR(rt5514->mclk)) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index e5fbf5305ea2..c4cf3cff58de 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -6,6 +6,7 @@ comment "Common SoC Audio options for Freescale CPUs:" config SND_SOC_FSL_ASRC tristate "Asynchronous Sample Rate Converter (ASRC) module support" depends on HAS_DMA + select DMA_SHARED_BUFFER select REGMAP_MMIO select SND_SOC_GENERIC_DMAENGINE_PCM select SND_COMPRESS_ACCEL diff --git a/sound/soc/fsl/fsl_asrc_m2m.c b/sound/soc/fsl/fsl_asrc_m2m.c index 4906843e2a8f..f46881f71e43 100644 --- a/sound/soc/fsl/fsl_asrc_m2m.c +++ b/sound/soc/fsl/fsl_asrc_m2m.c @@ -183,7 +183,7 @@ static int asrc_dmaconfig(struct fsl_asrc_pair *pair, } /* main function of converter */ -static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task) +static int asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_task_runtime *task) { struct fsl_asrc *asrc = pair->asrc; struct device *dev = &asrc->pdev->dev; @@ -193,7 +193,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas unsigned int out_dma_len; unsigned int width; u32 fifo_addr; - int ret; + int ret = 0; /* set ratio mod */ if (asrc->m2m_set_ratio_mod) { @@ -215,6 +215,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas in_buf_len > ASRC_M2M_BUFFER_SIZE || in_buf_len % (width * pair->channels / 8)) { dev_err(dev, "out buffer size is error: [%d]\n", in_buf_len); + ret = -EINVAL; goto end; } @@ -245,6 +246,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas } } else if (out_dma_len > ASRC_M2M_BUFFER_SIZE) { dev_err(dev, "cap buffer size error\n"); + ret = -EINVAL; goto end; } @@ -263,12 +265,14 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas if (!wait_for_completion_interruptible_timeout(&pair->complete[IN], 10 * HZ)) { dev_err(dev, "out DMA task timeout\n"); + ret = -ETIMEDOUT; goto end; } if (out_dma_len > 0) { if (!wait_for_completion_interruptible_timeout(&pair->complete[OUT], 10 * HZ)) { dev_err(dev, "cap DMA task timeout\n"); + ret = -ETIMEDOUT; goto end; } } @@ -278,7 +282,7 @@ static void asrc_m2m_device_run(struct fsl_asrc_pair *pair, struct snd_compr_tas /* update payload length for capture */ task->output_size = out_dma_len; end: - return; + return ret; } static int fsl_asrc_m2m_comp_open(struct snd_compr_stream *stream) @@ -525,9 +529,7 @@ static int fsl_asrc_m2m_comp_task_start(struct snd_compr_stream *stream, struct snd_compr_runtime *runtime = stream->runtime; struct fsl_asrc_pair *pair = runtime->private_data; - asrc_m2m_device_run(pair, task); - - return 0; + return asrc_m2m_device_run(pair, task); } static int fsl_asrc_m2m_comp_task_stop(struct snd_compr_stream *stream, @@ -633,7 +635,7 @@ int fsl_asrc_m2m_suspend(struct fsl_asrc *asrc) for (i = 0; i < PAIR_CTX_NUM; i++) { pair = asrc->pair[i]; - if (!pair) + if (!pair || !pair->dma_buffer[IN].area || !pair->dma_buffer[OUT].area) continue; if (!completion_done(&pair->complete[IN])) { if (pair->dma_chan[IN]) diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index c36b1a2ac949..ee94b256b770 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -648,23 +648,23 @@ multi_err: static int graph_parse_node_single(struct simple_util_priv *priv, enum graph_type gtype, - struct device_node *port, + struct device_node *ep, struct link_info *li, int is_cpu) { - struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(port, NULL); - return __graph_parse_node(priv, gtype, ep, li, is_cpu, 0); } static int graph_parse_node(struct simple_util_priv *priv, enum graph_type gtype, - struct device_node *port, + struct device_node *ep, struct link_info *li, int is_cpu) { + struct device_node *port __free(device_node) = ep_to_port(ep); + if (graph_lnk_is_multi(port)) return graph_parse_node_multi(priv, gtype, port, li, is_cpu); else - return graph_parse_node_single(priv, gtype, port, li, is_cpu); + return graph_parse_node_single(priv, gtype, ep, li, is_cpu); } static void graph_parse_daifmt(struct device_node *node, unsigned int *daifmt) @@ -722,14 +722,15 @@ static unsigned int graph_parse_bitframe(struct device_node *ep) static void graph_link_init(struct simple_util_priv *priv, struct device_node *lnk, - struct device_node *port_cpu, - struct device_node *port_codec, + struct device_node *ep_cpu, + struct device_node *ep_codec, struct link_info *li, int is_cpu_node) { struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct device_node *ep_cpu, *ep_codec; + struct device_node *port_cpu = ep_to_port(ep_cpu); + struct device_node *port_codec = ep_to_port(ep_codec); struct device_node *multi_cpu_port = NULL, *multi_codec_port = NULL; struct snd_soc_dai_link_component *dlc; unsigned int daifmt = 0; @@ -739,25 +740,23 @@ static void graph_link_init(struct simple_util_priv *priv, int multi_cpu_port_idx = 1, multi_codec_port_idx = 1; int i; - of_node_get(port_cpu); if (graph_lnk_is_multi(port_cpu)) { multi_cpu_port = port_cpu; ep_cpu = graph_get_next_multi_ep(&multi_cpu_port, multi_cpu_port_idx++); of_node_put(port_cpu); port_cpu = ep_to_port(ep_cpu); } else { - ep_cpu = of_graph_get_next_port_endpoint(port_cpu, NULL); + of_node_get(ep_cpu); } struct device_node *ports_cpu __free(device_node) = port_to_ports(port_cpu); - of_node_get(port_codec); if (graph_lnk_is_multi(port_codec)) { multi_codec_port = port_codec; ep_codec = graph_get_next_multi_ep(&multi_codec_port, multi_codec_port_idx++); of_node_put(port_codec); port_codec = ep_to_port(ep_codec); } else { - ep_codec = of_graph_get_next_port_endpoint(port_codec, NULL); + of_node_get(ep_codec); } struct device_node *ports_codec __free(device_node) = port_to_ports(port_codec); @@ -833,7 +832,7 @@ int audio_graph2_link_normal(struct simple_util_priv *priv, { struct device_node *cpu_port = lnk; struct device_node *cpu_ep __free(device_node) = of_graph_get_next_port_endpoint(cpu_port, NULL); - struct device_node *codec_port __free(device_node) = of_graph_get_remote_port(cpu_ep); + struct device_node *codec_ep __free(device_node) = of_graph_get_remote_endpoint(cpu_ep); int ret; /* @@ -841,18 +840,18 @@ int audio_graph2_link_normal(struct simple_util_priv *priv, * see * __graph_parse_node() :: DAI Naming */ - ret = graph_parse_node(priv, GRAPH_NORMAL, codec_port, li, 0); + ret = graph_parse_node(priv, GRAPH_NORMAL, codec_ep, li, 0); if (ret < 0) return ret; /* * call CPU, and set DAI Name */ - ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_port, li, 1); + ret = graph_parse_node(priv, GRAPH_NORMAL, cpu_ep, li, 1); if (ret < 0) return ret; - graph_link_init(priv, lnk, cpu_port, codec_port, li, 1); + graph_link_init(priv, lnk, cpu_ep, codec_ep, li, 1); return ret; } @@ -864,15 +863,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, { struct device_node *ep __free(device_node) = of_graph_get_next_port_endpoint(lnk, NULL); struct device_node *rep __free(device_node) = of_graph_get_remote_endpoint(ep); - struct device_node *cpu_port = NULL; - struct device_node *codec_port = NULL; + struct device_node *cpu_ep = NULL; + struct device_node *codec_ep = NULL; struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); int is_cpu = graph_util_is_ports0(lnk); int ret; if (is_cpu) { - cpu_port = of_graph_get_remote_port(ep); /* rport */ + cpu_ep = rep; /* * dpcm { @@ -901,12 +900,12 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - ret = graph_parse_node(priv, GRAPH_DPCM, cpu_port, li, 1); + ret = graph_parse_node(priv, GRAPH_DPCM, cpu_ep, li, 1); if (ret) - goto err; + return ret; } else { - codec_port = of_graph_get_remote_port(ep); /* rport */ + codec_ep = rep; /* * dpcm { @@ -937,18 +936,15 @@ int audio_graph2_link_dpcm(struct simple_util_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = simple_util_be_hw_params_fixup; - ret = graph_parse_node(priv, GRAPH_DPCM, codec_port, li, 0); + ret = graph_parse_node(priv, GRAPH_DPCM, codec_ep, li, 0); if (ret < 0) - goto err; + return ret; } graph_parse_convert(ep, dai_props); /* at node of <dpcm> */ graph_parse_convert(rep, dai_props); /* at node of <CPU/Codec> */ - graph_link_init(priv, lnk, cpu_port, codec_port, li, is_cpu); -err: - of_node_put(cpu_port); - of_node_put(codec_port); + graph_link_init(priv, lnk, cpu_ep, codec_ep, li, is_cpu); return ret; } @@ -1013,26 +1009,26 @@ int audio_graph2_link_c2c(struct simple_util_priv *priv, struct device_node *ep0 __free(device_node) = of_graph_get_next_port_endpoint(port0, NULL); struct device_node *ep1 __free(device_node) = of_graph_get_next_port_endpoint(port1, NULL); - struct device_node *codec0_port __free(device_node) = of_graph_get_remote_port(ep0); - struct device_node *codec1_port __free(device_node) = of_graph_get_remote_port(ep1); + struct device_node *codec0_ep __free(device_node) = of_graph_get_remote_endpoint(ep0); + struct device_node *codec1_ep __free(device_node) = of_graph_get_remote_endpoint(ep1); /* * call Codec first. * see * __graph_parse_node() :: DAI Naming */ - ret = graph_parse_node(priv, GRAPH_C2C, codec1_port, li, 0); + ret = graph_parse_node(priv, GRAPH_C2C, codec1_ep, li, 0); if (ret < 0) return ret; /* * call CPU, and set DAI Name */ - ret = graph_parse_node(priv, GRAPH_C2C, codec0_port, li, 1); + ret = graph_parse_node(priv, GRAPH_C2C, codec0_ep, li, 1); if (ret < 0) return ret; - graph_link_init(priv, lnk, codec0_port, codec1_port, li, 1); + graph_link_init(priv, lnk, codec0_ep, codec1_ep, li, 1); return ret; } diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 9caa4407c1ca..6446cda0f857 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1132,7 +1132,22 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, - { /* Vexia Edu Atla 10 tablet */ + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_JD_NOT_INV | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, + { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), diff --git a/sound/soc/renesas/Kconfig b/sound/soc/renesas/Kconfig index 426632996a0a..cb01fb36355f 100644 --- a/sound/soc/renesas/Kconfig +++ b/sound/soc/renesas/Kconfig @@ -67,7 +67,7 @@ config SND_SH7760_AC97 config SND_SIU_MIGOR tristate "SIU sound support on Migo-R" - depends on SH_MIGOR && I2C + depends on SH_MIGOR && I2C && DMADEVICES select SND_SOC_SH4_SIU select SND_SOC_WM8978 help diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index bd0dc586e24a..7f5fcaecee4b 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -22,7 +22,6 @@ #define DRV_NAME "rockchip-i2s-tdm" -#define DEFAULT_MCLK_FS 256 #define CH_GRP_MAX 4 /* The max channel 8 / 2 */ #define MULTIPLEX_CH_MAX 10 @@ -70,6 +69,8 @@ struct rk_i2s_tdm_dev { bool has_playback; bool has_capture; struct snd_soc_dai_driver *dai; + unsigned int mclk_rx_freq; + unsigned int mclk_tx_freq; }; static int to_ch_num(unsigned int val) @@ -617,6 +618,27 @@ static int rockchip_i2s_trcm_mode(struct snd_pcm_substream *substream, return 0; } +static int rockchip_i2s_tdm_set_sysclk(struct snd_soc_dai *cpu_dai, int stream, + unsigned int freq, int dir) +{ + struct rk_i2s_tdm_dev *i2s_tdm = to_info(cpu_dai); + + if (i2s_tdm->clk_trcm) { + i2s_tdm->mclk_tx_freq = freq; + i2s_tdm->mclk_rx_freq = freq; + } else { + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + i2s_tdm->mclk_tx_freq = freq; + else + i2s_tdm->mclk_rx_freq = freq; + } + + dev_dbg(i2s_tdm->dev, "The target mclk_%s freq is: %d\n", + stream ? "rx" : "tx", freq); + + return 0; +} + static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -631,15 +653,19 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream, if (i2s_tdm->clk_trcm == TRCM_TX) { mclk = i2s_tdm->mclk_tx; + mclk_rate = i2s_tdm->mclk_tx_freq; } else if (i2s_tdm->clk_trcm == TRCM_RX) { mclk = i2s_tdm->mclk_rx; + mclk_rate = i2s_tdm->mclk_rx_freq; } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { mclk = i2s_tdm->mclk_tx; + mclk_rate = i2s_tdm->mclk_tx_freq; } else { mclk = i2s_tdm->mclk_rx; + mclk_rate = i2s_tdm->mclk_rx_freq; } - err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params)); + err = clk_set_rate(mclk, mclk_rate); if (err) return err; @@ -799,6 +825,7 @@ static const struct snd_soc_dai_ops rockchip_i2s_tdm_dai_ops = { .hw_params = rockchip_i2s_tdm_hw_params, .set_bclk_ratio = rockchip_i2s_tdm_set_bclk_ratio, .set_fmt = rockchip_i2s_tdm_set_fmt, + .set_sysclk = rockchip_i2s_tdm_set_sysclk, .set_tdm_slot = rockchip_dai_tdm_slot, .trigger = rockchip_i2s_tdm_trigger, }; diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c index 0b85b29d1067..1e7bf00d7c46 100644 --- a/sound/soc/sof/imx/imx8.c +++ b/sound/soc/sof/imx/imx8.c @@ -175,8 +175,7 @@ static int imx8_run(struct snd_sof_dev *sdev) static int imx8_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; @@ -607,10 +606,31 @@ static struct snd_sof_of_mach sof_imx8_machs[] = { .drv_name = "asoc-audio-graph-card2", }, { + .compatible = "fsl,imx8qxp-mek-wcpu", + .sof_tplg_filename = "sof-imx8-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { .compatible = "fsl,imx8qm-mek", .sof_tplg_filename = "sof-imx8-wm8960.tplg", .drv_name = "asoc-audio-graph-card2", }, + { + .compatible = "fsl,imx8qm-mek-revd", + .sof_tplg_filename = "sof-imx8-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { + .compatible = "fsl,imx8qxp-mek-bb", + .sof_tplg_filename = "sof-imx8-cs42888.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { + .compatible = "fsl,imx8qm-mek-bb", + .sof_tplg_filename = "sof-imx8-cs42888.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + {} }; diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c index ff42743efa79..3cabdebac558 100644 --- a/sound/soc/sof/imx/imx8m.c +++ b/sound/soc/sof/imx/imx8m.c @@ -144,8 +144,7 @@ static int imx8m_reset(struct snd_sof_dev *sdev) static int imx8m_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; @@ -295,6 +294,17 @@ static struct snd_soc_dai_driver imx8m_dai[] = { }, }, { + .name = "sai2", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ .name = "sai3", .playback = { .channels_min = 1, @@ -306,6 +316,39 @@ static struct snd_soc_dai_driver imx8m_dai[] = { }, }, { + .name = "sai5", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ + .name = "sai6", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ + .name = "sai7", + .playback = { + .channels_min = 1, + .channels_max = 32, + }, + .capture = { + .channels_min = 1, + .channels_max = 32, + }, +}, +{ .name = "micfil", .capture = { .channels_min = 1, @@ -472,6 +515,11 @@ static const struct snd_sof_dsp_ops sof_imx8m_ops = { static struct snd_sof_of_mach sof_imx8mp_machs[] = { { + .compatible = "fsl,imx8mp-evk-revb4", + .sof_tplg_filename = "sof-imx8mp-wm8962.tplg", + .drv_name = "asoc-audio-graph-card2", + }, + { .compatible = "fsl,imx8mp-evk", .sof_tplg_filename = "sof-imx8mp-wm8960.tplg", .drv_name = "asoc-audio-graph-card2", diff --git a/sound/soc/sof/imx/imx8ulp.c b/sound/soc/sof/imx/imx8ulp.c index 6965791ab6ef..0704da27e69d 100644 --- a/sound/soc/sof/imx/imx8ulp.c +++ b/sound/soc/sof/imx/imx8ulp.c @@ -155,8 +155,7 @@ static int imx8ulp_reset(struct snd_sof_dev *sdev) static int imx8ulp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct device_node *np = pdev->dev.of_node; struct device_node *res_node; struct resource *mmio; diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c index 5282c0071534..e1f0e38c2407 100644 --- a/sound/soc/sof/intel/bdw.c +++ b/sound/soc/sof/intel/bdw.c @@ -410,8 +410,7 @@ static int bdw_probe(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c index 536d4c89d2f0..cae7dc0036c6 100644 --- a/sound/soc/sof/intel/byt.c +++ b/sound/soc/sof/intel/byt.c @@ -109,8 +109,7 @@ static int byt_acpi_probe(struct snd_sof_dev *sdev) { struct snd_sof_pdata *pdata = sdev->pdata; const struct sof_dev_desc *desc = pdata->desc; - struct platform_device *pdev = - container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); const struct sof_intel_dsp_desc *chip; struct resource *mmio; u32 base, size; diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c index 9955dfa520ae..31437fdd4e92 100644 --- a/sound/soc/sof/mediatek/mt8186/mt8186.c +++ b/sound/soc/sof/mediatek/mt8186/mt8186.c @@ -238,7 +238,7 @@ static int mt8186_run(struct snd_sof_dev *sdev) static int mt8186_dsp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv; int ret; diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 6032b566c679..371563d7ce79 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -228,7 +228,7 @@ static int mt8195_run(struct snd_sof_dev *sdev) static int mt8195_dsp_probe(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv; int ret; @@ -341,7 +341,7 @@ static int mt8195_dsp_shutdown(struct snd_sof_dev *sdev) static void mt8195_dsp_remove(struct snd_sof_dev *sdev) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); struct adsp_priv *priv = sdev->pdata->hw_pdata; platform_device_unregister(priv->ipc_dev); @@ -351,7 +351,7 @@ static void mt8195_dsp_remove(struct snd_sof_dev *sdev) static int mt8195_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state) { - struct platform_device *pdev = container_of(sdev->dev, struct platform_device, dev); + struct platform_device *pdev = to_platform_device(sdev->dev); int ret; u32 reset_sw, dbg_pc; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 7968d6a2f592..a97efb7b131e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2343,6 +2343,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x2fc6, 0xf0b7, /* iBasso DC07 Pro */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index e16cef160bc2..ce32cb35007d 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, ynl_attr_for_each_payload(start, data_len, attr) { astart_off = (char *)attr - (char *)start; - aend_off = astart_off + ynl_attr_data_len(attr); + aend_off = (char *)ynl_attr_data_end(attr) - (char *)start; if (aend_off <= off) continue; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d7c7d29291fb..d466447ae928 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) return PTR_ERR(sc->tp_format); } + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 5a3b8a5a9b5c..8d1e6bbeac90 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -26,8 +26,12 @@ check_vmlinux() { trace_landlock() { echo "Tracing syscall ${syscall}" - # test flight just to see if landlock_add_rule and libbpf are available - $TESTPROG + # test flight just to see if landlock_add_rule is available + if ! perf trace $TESTPROG 2>&1 | grep -q landlock + then + echo "No landlock system call found, skipping to non-syscall tracing." + return + fi if perf trace -e $syscall $TESTPROG 2>&1 | \ grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0d2ea22bd9e4..31bb326b07a6 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; } +const char * const perf_disassembler__strs[] = { + [PERF_DISASM_UNKNOWN] = "unknown", + [PERF_DISASM_LLVM] = "llvm", + [PERF_DISASM_CAPSTONE] = "capstone", + [PERF_DISASM_OBJDUMP] = "objdump", +}; + + +static void annotation_options__add_disassembler(struct annotation_options *options, + enum perf_disassembler dis) +{ + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) { + if (options->disassemblers[i] == dis) { + /* Disassembler is already present then don't add again. */ + return; + } + if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) { + /* Found a free slot. */ + options->disassemblers[i] = dis; + return; + } + } + pr_err("Failed to add disassembler %d\n", dis); +} + +static int annotation_options__add_disassemblers_str(struct annotation_options *options, + const char *str) +{ + while (str && *str != '\0') { + const char *comma = strchr(str, ','); + int len = comma ? comma - str : (int)strlen(str); + bool match = false; + + for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) { + const char *dis_str = perf_disassembler__strs[i]; + + if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) { + annotation_options__add_disassembler(options, i); + match = true; + break; + } + } + if (!match) { + pr_err("Invalid disassembler '%.*s'\n", len, str); + return -1; + } + str = comma ? comma + 1 : NULL; + } + return 0; +} + static int annotation__config(const char *var, const char *value, void *data) { struct annotation_options *opt = data; @@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data) else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; } else if (!strcmp(var, "annotate.disassemblers")) { - opt->disassemblers_str = strdup(value); - if (!opt->disassemblers_str) { - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; - } + int err = annotation_options__add_disassemblers_str(opt, value); + + if (err) + return err; } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2185,9 +2235,25 @@ void annotation_options__exit(void) zfree(&annotate_opts.objdump_path); } +static void annotation_options__default_init_disassemblers(struct annotation_options *options) +{ + if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) { + /* Already initialized. */ + return; + } +#ifdef HAVE_LIBLLVM_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_LLVM); +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE); +#endif + annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP); +} + void annotation_config__init(void) { perf_config(annotation__config, &annotate_opts); + annotation_options__default_init_disassemblers(&annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0ba5846dad4d..98db1b88daf4 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,8 +34,13 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 -// llvm, capstone, objdump -#define MAX_DISASSEMBLERS 3 +enum perf_disassembler { + PERF_DISASM_UNKNOWN = 0, + PERF_DISASM_LLVM, + PERF_DISASM_CAPSTONE, + PERF_DISASM_OBJDUMP, +}; +#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1) struct annotation_options { bool hide_src_code, @@ -52,14 +57,12 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; - u8 nr_disassemblers; + u8 disassemblers[MAX_DISASSEMBLERS]; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; - const char *disassemblers_str; - const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; @@ -134,6 +137,8 @@ struct disasm_line { struct annotation_line al; }; +extern const char * const perf_disassembler__strs[]; + void annotation_line__add(struct annotation_line *al, struct list_head *head); static inline double annotation_data__percent(struct annotation_data *data, diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 4a62ed593e84..e4352881e3fa 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, - value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } else if (size > 0 && size <= value_size) { /* struct */ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) augmented = true; - } else if (size < 0 && size >= -6) { /* buffer */ + } else if ((int)size < 0 && size >= -6) { /* buffer */ index = -(size + 1); barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. index &= 7; // Satisfy the bounds checking with the verifier in some kernels. - aug_size = args->args[index]; - - if (aug_size > TRACE_AUG_MAX_BUF) - aug_size = TRACE_AUG_MAX_BUF; + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; if (aug_size > 0) { if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 27094211edd8..5c329ad614e9 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die == -1) + if (die < 0) die = 0; /* @@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data) struct aggr_cpu_id id; /* There is no cluster_id on legacy system. */ - if (cluster == -1) + if (cluster < 0) cluster = 0; id = aggr_cpu_id__die(cpu, data); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index b7de4d9fd004..50c5c206b70e 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -2216,56 +2216,6 @@ out_free_command: return err; } -static int annotation_options__init_disassemblers(struct annotation_options *options) -{ - char *disassembler; - - if (options->disassemblers_str == NULL) { - const char *default_disassemblers_str = -#ifdef HAVE_LIBLLVM_SUPPORT - "llvm," -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - "capstone," -#endif - "objdump"; - - options->disassemblers_str = strdup(default_disassemblers_str); - if (!options->disassemblers_str) - goto out_enomem; - } - - disassembler = strdup(options->disassemblers_str); - if (disassembler == NULL) - goto out_enomem; - - while (1) { - char *comma = strchr(disassembler, ','); - - if (comma != NULL) - *comma = '\0'; - - options->disassemblers[options->nr_disassemblers++] = strim(disassembler); - - if (comma == NULL) - break; - - disassembler = comma + 1; - - if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { - pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", - MAX_DISASSEMBLERS, disassembler); - break; - } - } - - return 0; - -out_enomem: - pr_err("Not enough memory for annotate.disassemblers\n"); - return -1; -} - int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *options = args->options; @@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) char symfs_filename[PATH_MAX]; bool delete_extract = false; struct kcore_extract kce; - const char *disassembler; bool decomp = false; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); @@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) } } - err = annotation_options__init_disassemblers(options); - if (err) - goto out_remove_tmp; - err = -1; + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { + enum perf_disassembler dis = options->disassemblers[i]; - for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { - disassembler = options->disassemblers[i]; - - if (!strcmp(disassembler, "llvm")) + switch (dis) { + case PERF_DISASM_LLVM: err = symbol__disassemble_llvm(symfs_filename, sym, args); - else if (!strcmp(disassembler, "capstone")) + break; + case PERF_DISASM_CAPSTONE: err = symbol__disassemble_capstone(symfs_filename, sym, args); - else if (!strcmp(disassembler, "objdump")) + break; + case PERF_DISASM_OBJDUMP: err = symbol__disassemble_objdump(symfs_filename, sym, args); - else - pr_debug("Unknown disassembler %s, skipping...\n", disassembler); - } - - if (err == 0) { - pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", - disassembler, options->disassemblers_str); + break; + case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ + default: + goto out_remove_tmp; + } + if (err == 0) + pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); } out_remove_tmp: if (decomp) diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c index 38034fb82530..02b82774469c 100644 --- a/tools/testing/selftests/bpf/progs/find_vma.c +++ b/tools/testing/selftests/bpf/progs/find_vma.c @@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma, { if (vma->vm_file) bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1, - vma->vm_file->f_path.dentry->d_iname); + vma->vm_file->f_path.dentry->d_shortname.string); /* check for VM_EXEC */ if (vma->vm_flags & VM_EXEC) diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 384cfa3d38a6..92c2f0376c08 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -142,7 +142,7 @@ function pre_ethtool { } function check_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 local -n expected=$2 local last=$3 @@ -212,7 +212,7 @@ function check_tables { } function print_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 read -a have < $path tree $NSIM_DEV_DFS/ @@ -641,7 +641,7 @@ for port in 0 1; do NSIM_NETDEV=`get_netdev_name old_netdevs` ip link set dev $NSIM_NETDEV up - echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error msg="1 - create VxLANs v6" exp0=( 0 0 0 0 ) @@ -663,7 +663,7 @@ for port in 0 1; do new_geneve gnv0 20000 msg="2 - destroy GENEVE" - echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error exp1=( `mke 20000 2` 0 0 0 ) del_dev gnv0 @@ -764,7 +764,7 @@ for port in 0 1; do msg="create VxLANs v4" new_vxlan vxlan0 10000 $NSIM_NETDEV - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes down" @@ -775,7 +775,7 @@ for port in 0 1; do fi check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes up again" @@ -789,7 +789,7 @@ for port in 0 1; do del_dev vxlan0 check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="destroy NIC" @@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table" exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) del_dev vxlan2 -echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="tunnels destroyed 2" diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c index 4d3f4525e1e1..55bce47e56b7 100644 --- a/tools/testing/selftests/exec/check-exec.c +++ b/tools/testing/selftests/exec/check-exec.c @@ -22,6 +22,7 @@ #include <sys/prctl.h> #include <sys/socket.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <sys/sysmacros.h> #include <unistd.h> @@ -31,6 +32,12 @@ #include "../kselftest_harness.h" +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + static void drop_privileges(struct __test_metadata *const _metadata) { const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED; @@ -219,8 +226,8 @@ static void test_exec_fd(struct __test_metadata *_metadata, const int fd, * test framework as an error. With AT_EXECVE_CHECK, we only check a * potential successful execution. */ - access_ret = - execveat(fd, "", argv, NULL, AT_EMPTY_PATH | AT_EXECVE_CHECK); + access_ret = sys_execveat(fd, "", argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); access_errno = errno; if (err_code) { EXPECT_EQ(-1, access_ret); diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 6fb66a687f17..bbc29ed9c60a 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -46,12 +46,6 @@ remove_chip() { rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" } -configfs_cleanup() { - for CHIP in `ls $CONFIGFS_DIR/`; do - remove_chip $CHIP - done -} - create_chip() { local CHIP=$1 @@ -105,6 +99,13 @@ disable_chip() { echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" } +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + disable_chip $CHIP + remove_chip $CHIP + done +} + configfs_chip_name() { local CHIP=$1 local BANK=$2 @@ -181,6 +182,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +disable_chip chip remove_chip chip echo "1.2. chip_name returns 'none' if the chip is still pending" @@ -195,6 +197,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +disable_chip chip remove_chip chip echo "2. Creating and configuring simulated chips" @@ -204,6 +207,7 @@ create_chip chip create_bank chip bank enable_chip chip test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +disable_chip chip remove_chip chip echo "2.2. Number of lines can be specified" @@ -212,6 +216,7 @@ create_bank chip bank set_num_lines chip bank 16 enable_chip chip test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +disable_chip chip remove_chip chip echo "2.3. Label can be set" @@ -220,6 +225,7 @@ create_bank chip bank set_label chip bank foobar enable_chip chip test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +disable_chip chip remove_chip chip echo "2.4. Label can be left empty" @@ -227,6 +233,7 @@ create_chip chip create_bank chip bank enable_chip chip test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +disable_chip chip remove_chip chip echo "2.5. Line names can be configured" @@ -238,6 +245,7 @@ set_line_name chip bank 2 bar enable_chip chip test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.6. Line config can remain unused if offset is greater than number of lines" @@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar enable_chip chip test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.7. Line configfs directory names are sanitized" @@ -267,6 +276,7 @@ for CHIP in $CHIPS; do enable_chip $CHIP done for CHIP in $CHIPS; do + disable_chip $CHIP remove_chip $CHIP done @@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ fail "Setting label of a live chip should fail" echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ fail "Setting number of lines of a live chip should fail" +disable_chip chip remove_chip chip echo "2.10. Can't create line items when chip is live" @@ -285,6 +296,7 @@ create_chip chip create_bank chip bank enable_chip chip mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +disable_chip chip remove_chip chip echo "2.11. Probe errors are propagated to user-space" @@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog enable_chip chip $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ fail "Setting the value of a hogged line shouldn't succeed" +disable_chip chip remove_chip chip echo "3. Controlling simulated chips" @@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly" sysfs_set_pull chip bank 0 pull-down $BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 test "$?" = "0" || fail "pull set incorrectly" +disable_chip chip remove_chip chip echo "3.2. Pull can be read from sysfs" @@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" sysfs_set_pull chip bank 0 pull-up test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +disable_chip chip remove_chip chip echo "3.3. Incorrect input in sysfs is rejected" @@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +disable_chip chip remove_chip chip echo "3.4. Can't write to value" @@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +disable_chip chip remove_chip chip echo "4. Simulated GPIO chips are functional" @@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & sleep 0.1 # FIXME Any better way? test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" kill $! +disable_chip chip remove_chip chip echo "4.2. Bias settings work correctly" @@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +disable_chip chip remove_chip chip echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 2af86bd796ba..aa6f2c1cbec7 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -59,6 +59,12 @@ int open_tree(int dfd, const char *filename, unsigned int flags) } #endif +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + #ifndef RENAME_EXCHANGE #define RENAME_EXCHANGE (1 << 1) #endif @@ -2024,8 +2030,8 @@ static void test_check_exec(struct __test_metadata *const _metadata, int ret; char *const argv[] = { (char *)path, NULL }; - ret = execveat(AT_FDCWD, path, argv, NULL, - AT_EMPTY_PATH | AT_EXECVE_CHECK); + ret = sys_execveat(AT_FDCWD, path, argv, NULL, + AT_EMPTY_PATH | AT_EXECVE_CHECK); if (err) { EXPECT_EQ(-1, ret); EXPECT_EQ(errno, err); diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index d10f420e4ef6..fd0d959914e4 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -215,12 +215,14 @@ def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None, - fail=True, include_stderr=False): + fail=True, include_stderr=False, dev_bind=None): args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name) if prog_type is not None: args += " type " + prog_type if dev is not None: args += " dev " + dev + elif dev_bind is not None: + args += " xdpmeta_dev " + dev_bind if len(maps): args += " map " + " map ".join(maps) @@ -980,6 +982,16 @@ try: rm("/sys/fs/bpf/offload") sim.wait_for_flush() + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound", + dev_bind=sim['ifname']) + devbound = bpf_pinned("/sys/fs/bpf/devbound") + start_test("Test dev-bound program in generic mode...") + ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True) + fail(ret == 0, "devbound program in generic mode allowed") + check_extack(err, "Can't attach device-bound programs in generic mode.", args) + rm("/sys/fs/bpf/devbound") + sim.wait_for_flush() + start_test("Test XDP load failure...") sim.dfs["dev/bpf_bind_verifier_accept"] = 0 ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 18b9443454a9..bc6b6762baf3 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 8e3fc05a5397..c76525fe2b84 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index e15c43b7359b..ef8b25a606d8 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,13 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" "tcp_zerocopy_epoll_.*.pkt" - "tcp_tcp_info_tcp-info-*-limited.pkt" + "tcp_tcp_info_tcp-info-.*-limited.pkt" ) readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore index 9ae7964491d5..7d9c87cd0649 100644 --- a/tools/testing/selftests/riscv/vector/.gitignore +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -1,3 +1,4 @@ vstate_exec_nolibc vstate_prctl -v_initval_nolibc +v_initval +v_exec_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile index bfff0ff4f3be..6f7497f4e7b3 100644 --- a/tools/testing/selftests/riscv/vector/Makefile +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -2,18 +2,27 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := vstate_prctl v_initval_nolibc -TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc +TEST_GEN_PROGS := v_initval vstate_prctl +TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc include ../../lib.mk -$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S +$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/v_helpers.o: v_helpers.c + $(CC) -static -c -o$@ $(CFLAGS) $^ + +$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc -$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c +$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c new file mode 100644 index 000000000000..35c0812e32de --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Get values of vector registers as soon as the program starts to test if + * is properly cleaning the values before starting a new program. Vector + * registers are caller saved, so no function calls may happen before reading + * the values. To further ensure consistency, this file is compiled without + * libc and without auto-vectorization. + * + * To be "clean" all values must be either all ones or all zeroes. + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +int main(int argc, char **argv) +{ + char prev_value = 0, value; + unsigned long vl; + int first = 1; + + if (argc > 2 && strcmp(argv[2], "x")) + asm volatile ( + // 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli + // vsetvli t4, x0, e8, m1, d1 + ".4byte 0b00000000000000000111111011010111\n\t" + "mv %[vl], t4\n\t" + : [vl] "=r" (vl) : : "t4" + ); + else + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %[vl], x0, e8, m1, ta, ma\n\t" + ".option pop\n\t" + : [vl] "=r" (vl) + ); + +#define CHECK_VECTOR_REGISTER(register) ({ \ + for (int i = 0; i < vl; i++) { \ + asm volatile ( \ + ".option push\n\t" \ + ".option arch, +v\n\t" \ + "vmv.x.s %0, " __stringify(register) "\n\t" \ + "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ + ".option pop\n\t" \ + : "=r" (value)); \ + if (first) { \ + first = 0; \ + } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + printf("Register " __stringify(register) \ + " values not clean! value: %u\n", value); \ + exit(-1); \ + } \ + prev_value = value; \ + } \ +}) + + CHECK_VECTOR_REGISTER(v0); + CHECK_VECTOR_REGISTER(v1); + CHECK_VECTOR_REGISTER(v2); + CHECK_VECTOR_REGISTER(v3); + CHECK_VECTOR_REGISTER(v4); + CHECK_VECTOR_REGISTER(v5); + CHECK_VECTOR_REGISTER(v6); + CHECK_VECTOR_REGISTER(v7); + CHECK_VECTOR_REGISTER(v8); + CHECK_VECTOR_REGISTER(v9); + CHECK_VECTOR_REGISTER(v10); + CHECK_VECTOR_REGISTER(v11); + CHECK_VECTOR_REGISTER(v12); + CHECK_VECTOR_REGISTER(v13); + CHECK_VECTOR_REGISTER(v14); + CHECK_VECTOR_REGISTER(v15); + CHECK_VECTOR_REGISTER(v16); + CHECK_VECTOR_REGISTER(v17); + CHECK_VECTOR_REGISTER(v18); + CHECK_VECTOR_REGISTER(v19); + CHECK_VECTOR_REGISTER(v20); + CHECK_VECTOR_REGISTER(v21); + CHECK_VECTOR_REGISTER(v22); + CHECK_VECTOR_REGISTER(v23); + CHECK_VECTOR_REGISTER(v24); + CHECK_VECTOR_REGISTER(v25); + CHECK_VECTOR_REGISTER(v26); + CHECK_VECTOR_REGISTER(v27); + CHECK_VECTOR_REGISTER(v28); + CHECK_VECTOR_REGISTER(v29); + CHECK_VECTOR_REGISTER(v30); + CHECK_VECTOR_REGISTER(v31); + +#undef CHECK_VECTOR_REGISTER + + return 0; +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c new file mode 100644 index 000000000000..01a8799dcb78 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../hwprobe/hwprobe.h" +#include <asm/vendor/thead.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/wait.h> + +bool is_xtheadvector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR; +} + +bool is_vector_supported(void) +{ + struct riscv_hwprobe pair; + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + riscv_hwprobe(&pair, 1, 0, NULL, 0); + return pair.value & RISCV_HWPROBE_EXT_ZVE32X; +} + +int launch_test(char *next_program, int test_inherit, int xtheadvector) +{ + char *exec_argv[4], *exec_envp[1]; + int rc, pid, status; + + pid = fork(); + if (pid < 0) { + printf("fork failed %d", pid); + return -1; + } + + if (!pid) { + exec_argv[0] = next_program; + exec_argv[1] = test_inherit != 0 ? "x" : NULL; + exec_argv[2] = xtheadvector != 0 ? "x" : NULL; + exec_argv[3] = NULL; + exec_envp[0] = NULL; + /* launch the program again to check inherit */ + rc = execve(next_program, exec_argv, exec_envp); + if (rc) { + perror("execve"); + printf("child execve failed %d\n", rc); + exit(-1); + } + } + + rc = waitpid(-1, &status, 0); + if (rc < 0) { + printf("waitpid failed\n"); + return -3; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || + WIFSIGNALED(status)) { + printf("child exited abnormally\n"); + return -4; + } + + return WEXITSTATUS(status); +} diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h new file mode 100644 index 000000000000..763cddfe26da --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_helpers.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <stdbool.h> + +bool is_xtheadvector_supported(void); + +bool is_vector_supported(void); + +int launch_test(char *next_program, int test_inherit, int xtheadvector); diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c new file mode 100644 index 000000000000..be9e1d18ad29 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest_harness.h" +#include "v_helpers.h" + +#define NEXT_PROGRAM "./v_exec_initval_nolibc" + +TEST(v_initval) +{ + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c deleted file mode 100644 index 6174ffe016dc..000000000000 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include "../../kselftest.h" -#define MAX_VSIZE (8192 * 32) - -void dump(char *ptr, int size) -{ - int i = 0; - - for (i = 0; i < size; i++) { - if (i != 0) { - if (i % 16 == 0) - printf("\n"); - else if (i % 8 == 0) - printf(" "); - } - printf("%02x ", ptr[i]); - } - printf("\n"); -} - -int main(void) -{ - int i; - unsigned long vl; - char *datap, *tmp; - - ksft_set_plan(1); - - datap = malloc(MAX_VSIZE); - if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); - exit(-1); - } - - tmp = datap; - asm volatile ( - ".option push\n\t" - ".option arch, +v\n\t" - "vsetvli %0, x0, e8, m8, ta, ma\n\t" - "vse8.v v0, (%2)\n\t" - "add %1, %2, %0\n\t" - "vse8.v v8, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v16, (%1)\n\t" - "add %1, %1, %0\n\t" - "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); - - ksft_print_msg("vl = %lu\n", vl); - - if (datap[0] != 0x00 && datap[0] != 0xff) { - ksft_test_result_fail("v-regesters are not properly initialized\n"); - dump(datap, vl * 4); - exit(-1); - } - - for (i = 1; i < vl * 4; i++) { - if (datap[i] != datap[0]) { - ksft_test_result_fail("detect stale values on v-regesters\n"); - dump(datap, vl * 4); - exit(-2); - } - } - - free(datap); - - ksft_test_result_pass("tests for v_initval_nolibc pass\n"); - ksft_exit_pass(); - return 0; -} diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 1f9969bed235..7b7d6f21acb4 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -6,13 +6,16 @@ int main(int argc, char **argv) { - int rc, pid, status, test_inherit = 0; + int rc, pid, status, test_inherit = 0, xtheadvector = 0; long ctrl, ctrl_c; char *exec_argv[2], *exec_envp[2]; - if (argc > 1) + if (argc > 1 && strcmp(argv[1], "x")) test_inherit = 1; + if (argc > 2 && strcmp(argv[2], "x")) + xtheadvector = 1; + ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); if (ctrl < 0) { puts("PR_RISCV_V_GET_CONTROL is not supported\n"); @@ -53,11 +56,14 @@ int main(int argc, char **argv) puts("child's vstate_ctrl not equal to parent's\n"); exit(-1); } - asm volatile (".option push\n\t" - ".option arch, +v\n\t" - "vsetvli x0, x0, e32, m8, ta, ma\n\t" - ".option pop\n\t" - ); + if (xtheadvector) + asm volatile (".4byte 0x00007ed7"); + else + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + "vsetvli x0, x0, e32, m8, ta, ma\n\t" + ".option pop\n\t" + ); exit(ctrl); } } diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 40b3bffcbb40..62fbb17a0556 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -3,181 +3,244 @@ #include <unistd.h> #include <errno.h> #include <sys/wait.h> +#include <sys/types.h> +#include <stdlib.h> -#include "../hwprobe/hwprobe.h" -#include "../../kselftest.h" +#include "../../kselftest_harness.h" +#include "v_helpers.h" #define NEXT_PROGRAM "./vstate_exec_nolibc" -static int launch_test(int test_inherit) -{ - char *exec_argv[3], *exec_envp[1]; - int rc, pid, status; - - pid = fork(); - if (pid < 0) { - ksft_test_result_fail("fork failed %d", pid); - return -1; - } - if (!pid) { - exec_argv[0] = NEXT_PROGRAM; - exec_argv[1] = test_inherit != 0 ? "x" : NULL; - exec_argv[2] = NULL; - exec_envp[0] = NULL; - /* launch the program again to check inherit */ - rc = execve(NEXT_PROGRAM, exec_argv, exec_envp); - if (rc) { - perror("execve"); - ksft_test_result_fail("child execve failed %d\n", rc); - exit(-1); - } - } - - rc = waitpid(-1, &status, 0); - if (rc < 0) { - ksft_test_result_fail("waitpid failed\n"); - return -3; - } - - if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || - WIFSIGNALED(status)) { - ksft_test_result_fail("child exited abnormally\n"); - return -4; - } - - return WEXITSTATUS(status); -} - -int test_and_compare_child(long provided, long expected, int inherit) +int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector) { int rc; rc = prctl(PR_RISCV_V_SET_CONTROL, provided); if (rc != 0) { - ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n", - provided, rc); + printf("prctl with provided arg %lx failed with code %d\n", + provided, rc); return -1; } - rc = launch_test(inherit); + rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %ld\n", rc, - expected); + printf("Test failed, check %d != %ld\n", rc, expected); return -2; } return 0; } -#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 -#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 +#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 +#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 -int main(void) +TEST(get_control_no_v) { - struct riscv_hwprobe pair; - long flag, expected; long rc; - ksft_set_plan(1); + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); - pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; - rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); - if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %ld\n", rc); - return -1; - } + rc = prctl(PR_RISCV_V_GET_CONTROL); + EXPECT_EQ(-1, rc) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X"); +} - if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) { - ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n"); - return -2; - } +TEST(set_control_no_v) +{ + long rc; - if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { - rc = prctl(PR_RISCV_V_GET_CONTROL); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -3; - } - - rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); - return -4; - } - - ksft_test_result_skip("Vector not supported\n"); - return 0; - } + if (is_vector_supported() || is_xtheadvector_supported()) + SKIP(return, "Test expects vector to be not supported"); + + rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); + EXPECT_EQ(-1, rc) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); + EXPECT_EQ(EINVAL, errno) + TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X"); +} + +TEST(vstate_on_current) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_ON; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != 0) { - ksft_test_result_fail("Enabling V for current should always success\n"); - return -5; - } + EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed"); +} + +TEST(vstate_off_eperm) +{ + long flag; + long rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); flag = PR_RISCV_V_VSTATE_CTRL_OFF; rc = prctl(PR_RISCV_V_SET_CONTROL, flag); - if (rc != -1 || errno != EPERM) { - ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n", - errno); - return -5; + EXPECT_EQ(EPERM, errno) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); + EXPECT_EQ(-1, rc) + TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno); +} + +TEST(vstate_on_no_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); } /* Turn on next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0)) - return -6; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector)); +} + +TEST(vstate_off_nesting) +{ + long flag; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; - if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0)) - return -7; + + EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector)); +} + +TEST(vstate_on_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + + /* Turn on next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_on_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn on next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; - if (test_and_compare_child(flag, expected, 0)) - return -8; - if (test_and_compare_child(flag, expected, 1)) - return -9; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +TEST(vstate_off_inherit_no_nesting) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } + /* Turn off next's vector explicitly and test no inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; + + EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector)); +} + +TEST(vstate_off_inherit) +{ + long flag, expected; + int xtheadvector = 0; + + if (!is_vector_supported()) { + if (is_xtheadvector_supported()) + xtheadvector = 1; + else + SKIP(return, "Vector not supported"); + } /* Turn off next's vector explicitly and test inherit */ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; - if (test_and_compare_child(flag, expected, 0)) - return -10; - if (test_and_compare_child(flag, expected, 1)) - return -11; + EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector)); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_1) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - /* arguments should fail with EINVAL */ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/* arguments should fail with EINVAL */ +TEST(inval_set_control_2) +{ + int rc; + + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } +/* arguments should fail with EINVAL */ +TEST(inval_set_control_3) +{ + int rc; - rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); - if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("Undefined control argument should return EINVAL\n"); - return -12; - } + if (!is_vector_supported() && !is_xtheadvector_supported()) + SKIP(return, "Vector not supported"); - ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n"); - ksft_exit_pass(); - return 0; + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 0ea4f6813930..4d4a76532dc9 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -237,7 +237,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam) * both pointers should point to the same address. * * @return: - * 0: value on the pointer with metadate and value on original are same + * 0: value on the pointer with metadata and value on original are same * 1: not same. */ static int handle_lam_test(void *src, unsigned int lam) diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 7058dc614c25..de25892f865f 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -96,41 +96,57 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ -int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) { - struct sockaddr_vm sa_client = { - .svm_family = AF_VSOCK, - .svm_cid = VMADDR_CID_ANY, - .svm_port = bind_port, - }; - struct sockaddr_vm sa_server = { + struct sockaddr_vm sa = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; + int fd; - int client_fd, ret; - - client_fd = socket(AF_VSOCK, type, 0); - if (client_fd < 0) { + fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { perror("socket"); exit(EXIT_FAILURE); } - if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) { + if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { perror("bind"); exit(EXIT_FAILURE); } + return fd; +} + +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port) +{ + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + int ret; + timeout_begin(TIMEOUT); do { - ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server)); + ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa)); timeout_check("connect"); } while (ret < 0 && errno == EINTR); timeout_end(); - if (ret < 0) { + return ret; +} + +/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ +int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +{ + int client_fd; + + client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type); + + if (vsock_connect_fd(client_fd, cid, port)) { perror("connect"); exit(EXIT_FAILURE); } @@ -141,17 +157,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po /* Connect to <cid, port> and return the file descriptor. */ int vsock_connect(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; - int ret; int fd; control_expectln("LISTENING"); @@ -162,20 +167,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type) exit(EXIT_FAILURE); } - timeout_begin(TIMEOUT); - do { - ret = connect(fd, &addr.sa, sizeof(addr.svm)); - timeout_check("connect"); - } while (ret < 0 && errno == EINTR); - timeout_end(); - - if (ret < 0) { + if (vsock_connect_fd(fd, cid, port)) { int old_errno = errno; close(fd); fd = -1; errno = old_errno; } + return fd; } @@ -192,28 +191,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port) /* Listen on <cid, port> and return the file descriptor. */ static int vsock_listen(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; int fd; - fd = socket(AF_VSOCK, type, 0); - if (fd < 0) { - perror("socket"); - exit(EXIT_FAILURE); - } - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(cid, port, type); if (listen(fd, 1) < 0) { perror("listen"); diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e62f46b2b92a..d1f765ce3eee 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -39,10 +39,12 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); unsigned int parse_port(const char *str); +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port); int vsock_connect(unsigned int cid, unsigned int port, int type); int vsock_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind(unsigned int cid, unsigned int port, int type); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); int vsock_seqpacket_connect(unsigned int cid, unsigned int port); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 1eebbc0d5f61..dfff8b288265 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -113,24 +113,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts) static void test_stream_bind_only_server(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = opts->peer_port, - .svm_cid = VMADDR_CID_ANY, - }, - }; int fd; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM); /* Notify the client that the server is ready */ control_writeln("BIND"); @@ -1708,6 +1693,101 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) close(fd); } +#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ + +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) +{ + int sockets[MAX_PORT_RETRIES]; + struct sockaddr_vm addr; + int fd, i, alen; + + fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM); + + alen = sizeof(addr); + if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { + perror("getsockname"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < MAX_PORT_RETRIES; ++i) + sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port, + SOCK_STREAM); + + close(fd); + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) { + perror("Unexpected connect() #1 success"); + exit(EXIT_FAILURE); + } + + /* Vulnerable system may crash now. */ + if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) { + perror("Unexpected connect() #2 success"); + exit(EXIT_FAILURE); + } + + close(fd); + while (i--) + close(sockets[i]); + + control_writeln("DONE"); +} + +static void test_stream_transport_uaf_server(const struct test_opts *opts) +{ + control_expectln("DONE"); +} + +static void test_stream_connect_retry_client(const struct test_opts *opts) +{ + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + fprintf(stderr, "Unexpected connect() #1 success\n"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTEN"); + control_expectln("LISTENING"); + + if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + perror("connect() #2"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_connect_retry_server(const struct test_opts *opts) +{ + int fd; + + control_expectln("LISTEN"); + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1853,6 +1933,16 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_leak_zcskb_client, .run_server = test_stream_msgzcopy_leak_zcskb_server, }, + { + .name = "SOCK_STREAM transport release use-after-free", + .run_client = test_stream_transport_uaf_client, + .run_server = test_stream_transport_uaf_server, + }, + { + .name = "SOCK_STREAM retry failed connect()", + .run_client = test_stream_connect_retry_client, + .run_server = test_stream_connect_retry_server, + }, {}, }; |