diff options
621 files changed, 14337 insertions, 4338 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 4877563241f3..c7525942f12c 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -71,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - s390x - riscv64 - riscv32 + - loongarch64 And the older cBPF JIT supported on the following archs: diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index e96f057ea2a0..f47f63bcf67c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -71,6 +71,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst index ad4d4d5eecb0..63bb88846e50 100644 --- a/Documentation/bpf/libbpf/program_types.rst +++ b/Documentation/bpf/libbpf/program_types.rst @@ -56,6 +56,16 @@ described in more detail in the footnotes. | | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | | + +----------------------------------------+----------------------------------+-----------+ | | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_CONNECT`` | ``cgroup/connect_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_SENDMSG`` | ``cgroup/sendmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_RECVMSG`` | ``cgroup/recvmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETPEERNAME`` | ``cgroup/getpeername_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETSOCKNAME`` | ``cgroup/getsockname_unix`` | | +-------------------------------------------+----------------------------------------+----------------------------------+-----------+ | ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | | + +----------------------------------------+----------------------------------+-----------+ diff --git a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml index b568d0ce438d..7e1ffc551046 100644 --- a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml +++ b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml @@ -73,9 +73,6 @@ patternProperties: "^.*@[0-9a-f]+$": description: Devices attached to the bus type: object - properties: - reg: - maxItems: 1 required: - reg diff --git a/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml index 9ab5f0c435d4..d2cbe49f4e15 100644 --- a/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml +++ b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml @@ -69,7 +69,7 @@ examples: - | #include <dt-bindings/interrupt-controller/irq.h> - cache-controller@2010000 { + cache-controller@13400000 { compatible = "andestech,ax45mp-cache", "cache"; reg = <0x13400000 0x100000>; interrupts = <508 IRQ_TYPE_LEVEL_HIGH>; diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml b/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml index af7fe9c4d196..7979cf07f119 100644 --- a/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml @@ -87,7 +87,7 @@ required: - interrupts - ports -additionalProperties: false +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index ab69f4115de4..d9483fbd2454 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -55,6 +55,12 @@ properties: May not be supported by all controllers. + mctp-controller: + type: boolean + description: | + Indicates that the system is accessible via this bus as an endpoint for + MCTP over I3C transport. + required: - "#address-cells" - "#size-cells" diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml index 2bc38479a41e..0f4a062c9d6f 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml @@ -106,6 +106,12 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 maximum: 4096 + dma-noncoherent: + description: + Present if the GIC redistributors permit programming shareability + and cacheability attributes but are connected to a non-coherent + downstream interconnect. + msi-controller: description: Only present if the Message Based Interrupt functionality is @@ -193,6 +199,12 @@ patternProperties: compatible: const: arm,gic-v3-its + dma-noncoherent: + description: + Present if the GIC ITS permits programming shareability and + cacheability attributes but is connected to a non-coherent + downstream interconnect. + msi-controller: true "#msi-cells": diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml index 95033cb514fb..b417341fc8ae 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml @@ -37,6 +37,7 @@ properties: - renesas,intc-ex-r8a77990 # R-Car E3 - renesas,intc-ex-r8a77995 # R-Car D3 - renesas,intc-ex-r8a779a0 # R-Car V3U + - renesas,intc-ex-r8a779f0 # R-Car S4-8 - renesas,intc-ex-r8a779g0 # R-Car V4H - const: renesas,irqc diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml index 33b90e975e33..2ef3081eaaf3 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml @@ -19,20 +19,19 @@ description: | - NMI edge select (NMI is not treated as NMI exception and supports fall edge and stand-up edge detection interrupts) -allOf: - - $ref: /schemas/interrupt-controller.yaml# - properties: compatible: items: - enum: + - renesas,r9a07g043u-irqc # RZ/G2UL - renesas,r9a07g044-irqc # RZ/G2{L,LC} - renesas,r9a07g054-irqc # RZ/V2L - const: renesas,rzg2l-irqc '#interrupt-cells': - description: The first cell should contain external interrupt number (IRQ0-7) and the - second cell is used to specify the flag. + description: The first cell should contain a macro RZG2L_{NMI,IRQX} included in the + include/dt-bindings/interrupt-controller/irqc-rzg2l.h and the second + cell is used to specify the flag. const: 2 '#address-cells': @@ -44,7 +43,96 @@ properties: maxItems: 1 interrupts: - maxItems: 41 + minItems: 41 + items: + - description: NMI interrupt + - description: IRQ0 interrupt + - description: IRQ1 interrupt + - description: IRQ2 interrupt + - description: IRQ3 interrupt + - description: IRQ4 interrupt + - description: IRQ5 interrupt + - description: IRQ6 interrupt + - description: IRQ7 interrupt + - description: GPIO interrupt, TINT0 + - description: GPIO interrupt, TINT1 + - description: GPIO interrupt, TINT2 + - description: GPIO interrupt, TINT3 + - description: GPIO interrupt, TINT4 + - description: GPIO interrupt, TINT5 + - description: GPIO interrupt, TINT6 + - description: GPIO interrupt, TINT7 + - description: GPIO interrupt, TINT8 + - description: GPIO interrupt, TINT9 + - description: GPIO interrupt, TINT10 + - description: GPIO interrupt, TINT11 + - description: GPIO interrupt, TINT12 + - description: GPIO interrupt, TINT13 + - description: GPIO interrupt, TINT14 + - description: GPIO interrupt, TINT15 + - description: GPIO interrupt, TINT16 + - description: GPIO interrupt, TINT17 + - description: GPIO interrupt, TINT18 + - description: GPIO interrupt, TINT19 + - description: GPIO interrupt, TINT20 + - description: GPIO interrupt, TINT21 + - description: GPIO interrupt, TINT22 + - description: GPIO interrupt, TINT23 + - description: GPIO interrupt, TINT24 + - description: GPIO interrupt, TINT25 + - description: GPIO interrupt, TINT26 + - description: GPIO interrupt, TINT27 + - description: GPIO interrupt, TINT28 + - description: GPIO interrupt, TINT29 + - description: GPIO interrupt, TINT30 + - description: GPIO interrupt, TINT31 + - description: Bus error interrupt + + interrupt-names: + minItems: 41 + items: + - const: nmi + - const: irq0 + - const: irq1 + - const: irq2 + - const: irq3 + - const: irq4 + - const: irq5 + - const: irq6 + - const: irq7 + - const: tint0 + - const: tint1 + - const: tint2 + - const: tint3 + - const: tint4 + - const: tint5 + - const: tint6 + - const: tint7 + - const: tint8 + - const: tint9 + - const: tint10 + - const: tint11 + - const: tint12 + - const: tint13 + - const: tint14 + - const: tint15 + - const: tint16 + - const: tint17 + - const: tint18 + - const: tint19 + - const: tint20 + - const: tint21 + - const: tint22 + - const: tint23 + - const: tint24 + - const: tint25 + - const: tint26 + - const: tint27 + - const: tint28 + - const: tint29 + - const: tint30 + - const: tint31 + - const: bus-err clocks: maxItems: 2 @@ -72,6 +160,23 @@ required: - power-domains - resets +allOf: + - $ref: /schemas/interrupt-controller.yaml# + + - if: + properties: + compatible: + contains: + const: renesas,r9a07g043u-irqc + then: + properties: + interrupts: + minItems: 42 + interrupt-names: + minItems: 42 + required: + - interrupt-names + unevaluatedProperties: false examples: @@ -80,55 +185,66 @@ examples: #include <dt-bindings/clock/r9a07g044-cpg.h> irqc: interrupt-controller@110a0000 { - compatible = "renesas,r9a07g044-irqc", "renesas,rzg2l-irqc"; - reg = <0x110a0000 0x10000>; - #interrupt-cells = <2>; - #address-cells = <0>; - interrupt-controller; - interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 444 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 445 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 446 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 447 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 448 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 449 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 450 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 451 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 452 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 453 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 454 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 455 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 456 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 457 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 459 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 460 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 461 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 462 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 463 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 464 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 465 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 466 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 467 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 468 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 470 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 471 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, - <&cpg CPG_MOD R9A07G044_IA55_PCLK>; - clock-names = "clk", "pclk"; - power-domains = <&cpg>; - resets = <&cpg R9A07G044_IA55_RESETN>; + compatible = "renesas,r9a07g044-irqc", "renesas,rzg2l-irqc"; + reg = <0x110a0000 0x10000>; + #interrupt-cells = <2>; + #address-cells = <0>; + interrupt-controller; + interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 444 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 445 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 446 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 447 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 448 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 449 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 450 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 451 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 452 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 453 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 454 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 455 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 456 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 457 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 458 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 459 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 460 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 461 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 462 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 463 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 464 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 465 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 466 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 467 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 468 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 469 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 470 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 471 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "nmi", + "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31"; + clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>, + <&cpg CPG_MOD R9A07G044_IA55_PCLK>; + clock-names = "clk", "pclk"; + power-domains = <&cpg>; + resets = <&cpg R9A07G044_IA55_RESETN>; }; diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml index ffccf5f3c9e3..642f9b15d359 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml @@ -54,6 +54,7 @@ properties: port: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false properties: endpoint: diff --git a/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml b/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml index c5cab549ee8e..1c476b635b69 100644 --- a/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml +++ b/Documentation/devicetree/bindings/media/i2c/toshiba,tc358746.yaml @@ -69,6 +69,7 @@ properties: properties: port@0: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Input port properties: @@ -89,6 +90,7 @@ properties: port@1: $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Output port properties: diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml index 358019e85d90..326284e151f6 100644 --- a/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml +++ b/Documentation/devicetree/bindings/media/nxp,imx7-csi.yaml @@ -59,7 +59,6 @@ allOf: compatible: contains: enum: - - fsl,imx8mq-csi - fsl,imx8mm-csi then: required: diff --git a/Documentation/devicetree/bindings/media/renesas,vin.yaml b/Documentation/devicetree/bindings/media/renesas,vin.yaml index 324703bfb1bd..5539d0f8e74d 100644 --- a/Documentation/devicetree/bindings/media/renesas,vin.yaml +++ b/Documentation/devicetree/bindings/media/renesas,vin.yaml @@ -95,7 +95,7 @@ properties: synchronization is selected. default: 1 - field-active-even: true + field-even-active: true bus-width: true @@ -144,7 +144,7 @@ properties: synchronization is selected. default: 1 - field-active-even: true + field-even-active: true bus-width: true diff --git a/Documentation/devicetree/bindings/media/samsung,fimc.yaml b/Documentation/devicetree/bindings/media/samsung,fimc.yaml index 79ff6d83a9fd..b3486c38a05b 100644 --- a/Documentation/devicetree/bindings/media/samsung,fimc.yaml +++ b/Documentation/devicetree/bindings/media/samsung,fimc.yaml @@ -57,6 +57,7 @@ properties: patternProperties: "^port@[01]$": $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Camera A and camera B inputs. diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml index 0972868735fc..0e07ab61a48d 100644 --- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml @@ -12,7 +12,6 @@ maintainers: allOf: - $ref: /schemas/pci/pci-bus.yaml# - - $ref: /schemas/interrupt-controller/msi-controller.yaml# properties: compatible: @@ -34,13 +33,6 @@ properties: description: > Base address and length of the PCIe controller I/O register space - interrupt-map: true - - interrupt-map-mask: true - - "#interrupt-cells": - const: 1 - ranges: minItems: 1 maxItems: 2 @@ -54,16 +46,8 @@ properties: items: - const: pcie-phy - bus-range: true - dma-coherent: true - "#address-cells": true - - "#size-cells": true - - device_type: true - brcm,pcie-ob: type: boolean description: > @@ -78,20 +62,24 @@ properties: msi: type: object + $ref: /schemas/interrupt-controller/msi-controller.yaml# + unevaluatedProperties: false + properties: compatible: items: - const: brcm,iproc-msi - msi-parent: true + interrupts: + maxItems: 4 - msi-controller: true + brcm,pcie-msi-inten: + type: boolean + description: + Needs to be present for some older iProc platforms that require the + interrupt enable registers to be set explicitly to enable MSI - brcm,pcie-msi-inten: - type: boolean - description: > - Needs to be present for some older iProc platforms that require the - interrupt enable registers to be set explicitly to enable MSI + msi-parent: true dependencies: brcm,pcie-ob-axi-offset: ["brcm,pcie-ob"] @@ -117,68 +105,69 @@ unevaluatedProperties: false examples: - | - #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> - bus { - #address-cells = <1>; - #size-cells = <1>; - pcie0: pcie@18012000 { - compatible = "brcm,iproc-pcie"; - reg = <0x18012000 0x1000>; + gic: interrupt-controller { + interrupt-controller; + #interrupt-cells = <3>; + }; - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>; + pcie@18012000 { + compatible = "brcm,iproc-pcie"; + reg = <0x18012000 0x1000>; - linux,pci-domain = <0>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>; - bus-range = <0x00 0xff>; + linux,pci-domain = <0>; - #address-cells = <3>; - #size-cells = <2>; - device_type = "pci"; - ranges = <0x81000000 0 0 0x28000000 0 0x00010000>, - <0x82000000 0 0x20000000 0x20000000 0 0x04000000>; + bus-range = <0x00 0xff>; - phys = <&phy 0 5>; - phy-names = "pcie-phy"; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + ranges = <0x81000000 0 0 0x28000000 0 0x00010000>, + <0x82000000 0 0x20000000 0x20000000 0 0x04000000>; - brcm,pcie-ob; - brcm,pcie-ob-axi-offset = <0x00000000>; + phys = <&phy 0 5>; + phy-names = "pcie-phy"; - msi-parent = <&msi0>; + brcm,pcie-ob; + brcm,pcie-ob-axi-offset = <0x00000000>; - /* iProc event queue based MSI */ - msi0: msi { - compatible = "brcm,iproc-msi"; - msi-controller; - interrupt-parent = <&gic>; - interrupts = <GIC_SPI 96 IRQ_TYPE_NONE>, - <GIC_SPI 97 IRQ_TYPE_NONE>, - <GIC_SPI 98 IRQ_TYPE_NONE>, - <GIC_SPI 99 IRQ_TYPE_NONE>; - }; - }; + msi-parent = <&msi0>; - pcie1: pcie@18013000 { - compatible = "brcm,iproc-pcie"; - reg = <0x18013000 0x1000>; + /* iProc event queue based MSI */ + msi0: msi { + compatible = "brcm,iproc-msi"; + msi-controller; + interrupt-parent = <&gic>; + interrupts = <GIC_SPI 96 IRQ_TYPE_NONE>, + <GIC_SPI 97 IRQ_TYPE_NONE>, + <GIC_SPI 98 IRQ_TYPE_NONE>, + <GIC_SPI 99 IRQ_TYPE_NONE>; + }; + }; + - | + pcie@18013000 { + compatible = "brcm,iproc-pcie"; + reg = <0x18013000 0x1000>; - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>; - linux,pci-domain = <1>; + linux,pci-domain = <1>; - bus-range = <0x00 0xff>; + bus-range = <0x00 0xff>; - #address-cells = <3>; - #size-cells = <2>; - device_type = "pci"; - ranges = <0x81000000 0 0 0x48000000 0 0x00010000>, - <0x82000000 0 0x40000000 0x40000000 0 0x04000000>; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + ranges = <0x81000000 0 0 0x48000000 0 0x00010000>, + <0x82000000 0 0x40000000 0x40000000 0 0x04000000>; - phys = <&phy 1 6>; - phy-names = "pcie-phy"; - }; + phys = <&phy 1 6>; + phy-names = "pcie-phy"; }; diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 38c0b5213736..97e8441eda1c 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -91,6 +91,7 @@ properties: interrupt-controller: type: object + additionalProperties: false description: Describes the CPU's local interrupt controller properties: diff --git a/Documentation/devicetree/bindings/sound/fsl,micfil.yaml b/Documentation/devicetree/bindings/sound/fsl,micfil.yaml index 4b99a18c79a0..b7e605835639 100644 --- a/Documentation/devicetree/bindings/sound/fsl,micfil.yaml +++ b/Documentation/devicetree/bindings/sound/fsl,micfil.yaml @@ -56,6 +56,9 @@ properties: - const: clkext3 minItems: 2 + "#sound-dai-cells": + const: 0 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml index 4f51b2fa82db..c3c989ef2a2c 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.yaml @@ -26,6 +26,7 @@ properties: - const: rockchip,rk3568-spdif - items: - enum: + - rockchip,rk3128-spdif - rockchip,rk3188-spdif - rockchip,rk3288-spdif - rockchip,rk3308-spdif diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index cd58179ae337..430a814f64a5 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -232,7 +232,7 @@ properties: # MEMSIC magnetometer - memsic,mmc35240 # MEMSIC 3-axis accelerometer - - memsic,mx4005 + - memsic,mxc4005 # MEMSIC 2-axis 8-bit digital accelerometer - memsic,mxc6225 # MEMSIC 2-axis 8-bit digital accelerometer diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst index 69670deb8c4e..e3d593841aa7 100644 --- a/Documentation/driver-api/dpll.rst +++ b/Documentation/driver-api/dpll.rst @@ -173,6 +173,47 @@ in order to configure active input of a MUX-type pin, the user needs to request desired pin state of the child pin on the parent pin, as described in the ``MUX-type pins`` chapter. +Phase offset measurement and adjustment +======================================== + +Device may provide ability to measure a phase difference between signals +on a pin and its parent dpll device. If pin-dpll phase offset measurement +is supported, it shall be provided with ``DPLL_A_PIN_PHASE_OFFSET`` +attribute for each parent dpll device. + +Device may also provide ability to adjust a signal phase on a pin. +If pin phase adjustment is supported, minimal and maximal values that pin +handle shall be provide to the user on ``DPLL_CMD_PIN_GET`` respond +with ``DPLL_A_PIN_PHASE_ADJUST_MIN`` and ``DPLL_A_PIN_PHASE_ADJUST_MAX`` +attributes. Configured phase adjust value is provided with +``DPLL_A_PIN_PHASE_ADJUST`` attribute of a pin, and value change can be +requested with the same attribute with ``DPLL_CMD_PIN_SET`` command. + + =============================== ====================================== + ``DPLL_A_PIN_ID`` configured pin id + ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment + ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment + ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase + adjustment on parent dpll device + ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting + configuration on given parent dpll + device + ``DPLL_A_PIN_PARENT_ID`` parent dpll device id + ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference + between a pin and parent dpll device + =============================== ====================================== + +All phase related values are provided in pico seconds, which represents +time difference between signals phase. The negative value means that +phase of signal on pin is earlier in time than dpll's signal. Positive +value means that phase of signal on pin is later in time than signal of +a dpll. + +Phase adjust (also min and max) values are integers, but measured phase +offset values are fractional with 3-digit decimal places and shell be +divided with ``DPLL_PIN_PHASE_OFFSET_DIVIDER`` to get integer part and +modulo divided to get fractional part. + Configuration commands group ============================ @@ -263,6 +304,12 @@ according to attribute purpose. frequencies ``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency ``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency + ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase + adjustment + ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase + adjustment + ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase + adjustment on parent device ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent device the pin is connected with ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id @@ -270,8 +317,10 @@ according to attribute purpose. dpll device ``DPLL_A_PIN_STATE`` attr state of pin on the parent dpll device - ``DPLL_A_PIN_DIRECTION`` attr direction of a pin on the + ``DPLL_A_PIN_DIRECTION`` attr direction of a pin on the parent dpll device + ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference + between a pin and parent dpll ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin the pin is connected with ``DPLL_A_PIN_PARENT_ID`` attr parent pin id @@ -284,6 +333,8 @@ according to attribute purpose. ``DPLL_CMD_PIN_SET`` command to set pins configuration ``DPLL_A_PIN_ID`` attr unique a pin ID ``DPLL_A_PIN_FREQUENCY`` attr requested frequency of a pin + ``DPLL_A_PIN_PHASE_ADJUST`` attr requested value of phase + adjustment on parent device ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent dpll device configuration request ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index 4654ee57c1d5..f200d7874495 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -58,12 +58,14 @@ Here are the main features of EROFS: - Support extended attributes as an option; + - Support a bloom filter that speeds up negative extended attribute lookups; + - Support POSIX.1e ACLs by using extended attributes; - Support transparent data compression as an option: - LZ4 and MicroLZMA algorithms can be used on a per-file basis; In addition, - inplace decompression is also supported to avoid bounce compressed buffers - and page cache thrashing. + LZ4, MicroLZMA and DEFLATE algorithms can be used on a per-file basis; In + addition, inplace decompression is also supported to avoid bounce compressed + buffers and unnecessary page cache thrashing. - Support chunk-based data deduplication and rolling-hash compressed data deduplication; @@ -268,6 +270,38 @@ details.) By the way, chunk-based files are all uncompressed for now. +Long extended attribute name prefixes +------------------------------------- +There are use cases where extended attributes with different values can have +only a few common prefixes (such as overlayfs xattrs). The predefined prefixes +work inefficiently in both image size and runtime performance in such cases. + +The long xattr name prefixes feature is introduced to address this issue. The +overall idea is that, apart from the existing predefined prefixes, the xattr +entry could also refer to user-specified long xattr name prefixes, e.g. +"trusted.overlay.". + +When referring to a long xattr name prefix, the highest bit (bit 7) of +erofs_xattr_entry.e_name_index is set, while the lower bits (bit 0-6) as a whole +represent the index of the referred long name prefix among all long name +prefixes. Therefore, only the trailing part of the name apart from the long +xattr name prefix is stored in erofs_xattr_entry.e_name, which could be empty if +the full xattr name matches exactly as its long xattr name prefix. + +All long xattr prefixes are stored one by one in the packed inode as long as +the packed inode is valid, or in the meta inode otherwise. The +xattr_prefix_count (of the on-disk superblock) indicates the total number of +long xattr name prefixes, while (xattr_prefix_start * 4) indicates the start +offset of long name prefixes in the packed/meta inode. Note that, long extended +attribute name prefixes are disabled if xattr_prefix_count is 0. + +Each long name prefix is stored in the format: ALIGN({__le16 len, data}, 4), +where len represents the total size of the data part. The data part is actually +represented by 'struct erofs_xattr_long_prefix', where base_index represents the +index of the predefined xattr name prefix, e.g. EROFS_XATTR_INDEX_TRUSTED for +"trusted.overlay." long name prefix, while the infix string keeps the string +after stripping the short prefix, e.g. "overlay." for the example above. + Data compression ---------------- EROFS implements fixed-sized output compression which generates fixed-sized diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 9806c44f604c..f9366aaddd21 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -26,10 +26,6 @@ properties: type: string doc: type: string - version: - description: Generic Netlink family version. Default is 1. - type: integer - minimum: 1 protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink, genetlink-c ] @@ -142,7 +138,7 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: @@ -215,6 +211,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 12a0a045605d..a6a490333a1a 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -26,10 +26,6 @@ properties: type: string doc: type: string - version: - description: Generic Netlink family version. Default is 1. - type: integer - minimum: 1 protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink, genetlink-c, genetlink-legacy ] # Trim @@ -53,6 +49,10 @@ properties: Defines if the input policy in the kernel is global, per-operation, or split per operation type. Default is split. enum: [ split, per-op, global ] + version: + description: Generic Netlink family version. Default is 1. + type: integer + minimum: 1 # End genetlink-legacy definitions: @@ -180,7 +180,7 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: @@ -254,6 +254,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 3d338c48bf21..2b788e607a14 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -26,10 +26,6 @@ properties: type: string doc: type: string - version: - description: Generic Netlink family version. Default is 1. - type: integer - minimum: 1 protocol: description: Schema compatibility level. Default is "genetlink". enum: [ genetlink ] @@ -115,7 +111,7 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: @@ -184,6 +180,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 896797876414..d976851b80f8 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -187,7 +187,7 @@ properties: type: array items: type: object - required: [ name, type ] + required: [ name ] additionalProperties: False properties: name: @@ -261,6 +261,18 @@ properties: not: required: [ name-prefix ] + # type property is only required if not in subset definition + if: + properties: + subset-of: + not: + type: string + then: + properties: + attributes: + items: + required: [ type ] + operations: description: Operations supported by the protocol. type: object diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index d1ebcd927149..86a12c5bcff1 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -199,54 +199,44 @@ attribute-sets: attributes: - name: reload-stats - type: nest - name: remote-reload-stats - type: nest - name: dl-reload-stats subset-of: devlink attributes: - name: reload-action-info - type: nest - name: dl-reload-act-info subset-of: devlink attributes: - name: reload-action - type: u8 - name: reload-action-stats - type: nest - name: dl-reload-act-stats subset-of: devlink attributes: - name: reload-stats-entry - type: nest - name: dl-reload-stats-entry subset-of: devlink attributes: - name: reload-stats-limit - type: u8 - name: reload-stats-value - type: u32 - name: dl-info-version subset-of: devlink attributes: - name: info-version-name - type: string - name: info-version-value - type: string operations: enum-model: directional diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 8b86b28b47a6..cf8abe1c0550 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -164,6 +164,18 @@ definitions: - name: state-can-change doc: pin state can be changed + - + type: const + name: phase-offset-divider + value: 1000 + doc: | + phase offset divider allows userspace to calculate a value of + measured signal phase difference between a pin and dpll device + as a fractional value with three digit decimal precision. + Value of (DPLL_A_PHASE_OFFSET / DPLL_PHASE_OFFSET_DIVIDER) is an + integer part of a measured phase offset value. + Value of (DPLL_A_PHASE_OFFSET % DPLL_PHASE_OFFSET_DIVIDER) is a + fractional part of a measured phase offset value. attribute-sets: - @@ -272,42 +284,48 @@ attribute-sets: type: nest multi-attr: true nested-attributes: pin-parent-pin + - + name: phase-adjust-min + type: s32 + - + name: phase-adjust-max + type: s32 + - + name: phase-adjust + type: s32 + - + name: phase-offset + type: s64 - name: pin-parent-device subset-of: pin attributes: - name: parent-id - type: u32 - name: direction - type: u32 - name: prio - type: u32 - name: state - type: u32 + - + name: phase-offset - name: pin-parent-pin subset-of: pin attributes: - name: parent-id - type: u32 - name: state - type: u32 - name: frequency-range subset-of: pin attributes: - name: frequency-min - type: u64 - name: frequency-max - type: u64 operations: enum-name: dpll_cmd @@ -439,6 +457,9 @@ operations: - capabilities - parent-device - parent-pin + - phase-adjust-min + - phase-adjust-max + - phase-adjust dump: pre: dpll-lock-dumpit @@ -466,6 +487,7 @@ operations: - state - parent-device - parent-pin + - phase-adjust - name: pin-create-ntf doc: Notification about pin appearing diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 837b565577ca..5c7a65b009b4 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -818,13 +818,10 @@ attribute-sets: attributes: - name: hist-bkt-low - type: u32 - name: hist-bkt-hi - type: u32 - name: hist-val - type: u64 - name: stats attributes: diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index f69da5074860..7d8c5380492f 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -650,8 +650,8 @@ before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, -sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF -instruction set. +sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation +from eBPF instruction set. Testing ------- diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 5b75c3f7a137..2ffc5ad10295 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -59,7 +59,6 @@ Contents: gtp ila ioam6-sysctl - ipddp ip_dynaddr ipsec ip-sysctl diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index f7dfde3b09a9..e7ec9026e5db 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1183,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER Default: 128 +tcp_pingpong_thresh - INTEGER + The number of estimated data replies sent for estimated incoming data + requests that must happen before TCP considers that a connection is a + "ping-pong" (request-response) connection for which delayed + acknowledgments can provide benefits. + + This threshold is 1 by default, but some applications may need a higher + threshold for optimal performance. + + Possible Values: 1 - 255 + + Default: 1 + UDP variables ============= diff --git a/Documentation/networking/ipddp.rst b/Documentation/networking/ipddp.rst deleted file mode 100644 index be7091b77927..000000000000 --- a/Documentation/networking/ipddp.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -========================================================= -AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation -========================================================= - -Documentation ipddp.c - -This file is written by Jay Schulist <jschlst@samba.org> - -Introduction ------------- - -AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk -networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams -inside AppleTalk packets. - -Through this driver you can either allow your Linux box to communicate -IP over an AppleTalk network or you can provide IP gatewaying functions -for your AppleTalk users. - -You can currently encapsulate or decapsulate AppleTalk-IP on LocalTalk, -EtherTalk and PPPTalk. The only limit on the protocol is that of what -kernel AppleTalk layer and drivers are available. - -Each mode requires its own user space software. - -Compiling AppleTalk-IP Decapsulation/Encapsulation -================================================== - -AppleTalk-IP decapsulation needs to be compiled into your kernel. You -will need to turn on AppleTalk-IP driver support. Then you will need to -select ONE of the two options; IP to AppleTalk-IP encapsulation support or -AppleTalk-IP to IP decapsulation support. If you compile the driver -statically you will only be able to use the driver for the function you have -enabled in the kernel. If you compile the driver as a module you can -select what mode you want it to run in via a module loading param. -ipddp_mode=1 for AppleTalk-IP encapsulation and ipddp_mode=2 for -AppleTalk-IP to IP decapsulation. - -Basic instructions for user space tools -======================================= - -I will briefly describe the operation of the tools, but you will -need to consult the supporting documentation for each set of tools. - -Decapsulation - You will need to download a software package called -MacGate. In this distribution there will be a tool called MacRoute -which enables you to add routes to the kernel for your Macs by hand. -Also the tool MacRegGateWay is included to register the -proper IP Gateway and IP addresses for your machine. Included in this -distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from -ftp.u.washington.edu/pub/user-supported/asun/) this patch is optional -but it allows automatic adding and deleting of routes for Macs. (Handy -for locations with large Mac installations) - -Encapsulation - You will need to download a software daemon called ipddpd. -This software expects there to be an AppleTalk-IP gateway on the network. -You will also need to add the proper routes to route your Linux box's IP -traffic out the ipddp interface. - -Common Uses of ipddp.c ----------------------- -Of course AppleTalk-IP decapsulation and encapsulation, but specifically -decapsulation is being used most for connecting LocalTalk networks to -IP networks. Although it has been used on EtherTalk networks to allow -Macs that are only able to tunnel IP over EtherTalk. - -Encapsulation has been used to allow a Linux box stuck on a LocalTalk -network to use IP. It should work equally well if you are stuck on an -EtherTalk only network. - -Further Assistance -------------------- -You can contact me (Jay Schulist <jschlst@samba.org>) with any -questions regarding decapsulation or encapsulation. Bradford W. Johnson -<johns393@maroon.tc.umn.edu> originally wrote the ipddp.c driver for IP -encapsulation in AppleTalk. diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index b3ea96af9b49..78fb70e748b7 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -7,7 +7,8 @@ Intro ===== The MSG_ZEROCOPY flag enables copy avoidance for socket send calls. -The feature is currently implemented for TCP and UDP sockets. +The feature is currently implemented for TCP, UDP and VSOCK (with +virtio transport) sockets. Opportunity and Caveats @@ -174,7 +175,9 @@ read_notification() call in the previous snippet. A notification is encoded in the standard error format, sock_extended_err. The level and type fields in the control data are protocol family -specific, IP_RECVERR or IPV6_RECVERR. +specific, IP_RECVERR or IPV6_RECVERR (for TCP or UDP socket). +For VSOCK socket, cmsg_level will be SOL_VSOCK and cmsg_type will be +VSOCK_RECVERR. Error origin is the new type SO_EE_ORIGIN_ZEROCOPY. ee_errno is zero, as explained before, to avoid blocking read and write system calls on @@ -235,12 +238,15 @@ Implementation Loopback -------- +For TCP and UDP: Data sent to local sockets can be queued indefinitely if the receive process does not read its socket. Unbound notification latency is not acceptable. For this reason all packets generated with MSG_ZEROCOPY that are looped to a local socket will incur a deferred copy. This includes looping onto packet sockets (e.g., tcpdump) and tun devices. +For VSOCK: +Data path sent to local sockets is the same as for non-local sockets. Testing ======= @@ -254,3 +260,6 @@ instance when run with msg_zerocopy.sh between a veth pair across namespaces, the test will not show any improvement. For testing, the loopback restriction can be temporarily relaxed by making skb_orphan_frags_rx identical to skb_orphan_frags. + +For VSOCK type of socket example can be found in +tools/testing/vsock/vsock_test_zerocopy.c. diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 7a9de0568e84..390730a74332 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -99,9 +99,6 @@ Dynamic reconfiguration: Dynamic reconfigurability is a useful addition to netconsole that enables remote logging targets to be dynamically added, removed, or have their parameters reconfigured at runtime from a configfs-based userspace interface. -[ Note that the parameters of netconsole targets that were specified/created -from the boot/module option are not exposed via this interface, and hence -cannot be modified dynamically. ] To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the netconsole module (or kernel, if netconsole is built-in). @@ -155,6 +152,25 @@ You can also update the local interface dynamically. This is especially useful if you want to use interfaces that have newly come up (and may not have existed when netconsole was loaded / initialized). +Netconsole targets defined at boot time (or module load time) with the +`netconsole=` param are assigned the name `cmdline<index>`. For example, the +first target in the parameter is named `cmdline0`. You can control and modify +these targets by creating configfs directories with the matching name. + +Let's suppose you have two netconsole targets defined at boot time:: + + netconsole=4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc;4444@10.0.0.1/eth1,9353@10.0.0.3/12:34:56:78:9a:bc + +You can modify these targets in runtime by creating the following targets:: + + mkdir cmdline0 + cat cmdline0/remote_ip + 10.0.0.2 + + mkdir cmdline1 + cat cmdline1/remote_ip + 10.0.0.3 + Extended console: ================= diff --git a/Documentation/process/7.AdvancedTopics.rst b/Documentation/process/7.AdvancedTopics.rst index bf7cbfb4caa5..43291704338e 100644 --- a/Documentation/process/7.AdvancedTopics.rst +++ b/Documentation/process/7.AdvancedTopics.rst @@ -146,6 +146,7 @@ pull. The git request-pull command can be helpful in this regard; it will format the request as other developers expect, and will also check to be sure that you have remembered to push those changes to the public server. +.. _development_advancedtopics_reviews: Reviewing patches ----------------- @@ -167,6 +168,12 @@ comments as questions rather than criticisms. Asking "how does the lock get released in this path?" will always work better than stating "the locking here is wrong." +Another technique that is useful in case of a disagreement is to ask for others +to chime in. If a discussion reaches a stalemate after a few exchanges, +then call for opinions of other reviewers or maintainers. Often those in +agreement with a reviewer remain silent unless called upon. +The opinion of multiple people carries exponentially more weight. + Different developers will review code from different points of view. Some are mostly concerned with coding style and whether code lines have trailing white space. Others will focus primarily on whether the change implemented @@ -176,3 +183,14 @@ security issues, duplication of code found elsewhere, adequate documentation, adverse effects on performance, user-space ABI changes, etc. All types of review, if they lead to better code going into the kernel, are welcome and worthwhile. + +There is no strict requirement to use specific tags like ``Reviewed-by``. +In fact reviews in plain English are more informative and encouraged +even when a tag is provided, e.g. "I looked at aspects A, B and C of this +submission and it looks good to me." +Some form of a review message or reply is obviously necessary otherwise +maintainers will not know that the reviewer has looked at the patch at all! + +Last but not least patch review may become a negative process, focused +on pointing out problems. Please throw in a compliment once in a while, +particularly for newbies! diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 09dcf6377c27..7feacc20835e 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -441,6 +441,21 @@ in a way which would break what would normally be considered uAPI. new ``netdevsim`` features must be accompanied by selftests under ``tools/testing/selftests/``. +Reviewer guidance +----------------- + +Reviewing other people's patches on the list is highly encouraged, +regardless of the level of expertise. For general guidance and +helpful tips please see :ref:`development_advancedtopics_reviews`. + +It's safe to assume that netdev maintainers know the community and the level +of expertise of the reviewers. The reviewers should not be concerned about +their comments impeding or derailing the patch flow. + +Less experienced reviewers are highly encouraged to do more in-depth +review of submissions and not focus exclusively on trivial or subjective +matters like code formatting, tags etc. + Testimonials / feedback ----------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 1bd96045beb8..ed33b9df8b3d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1459,7 +1459,6 @@ F: drivers/hwmon/applesmc.c APPLETALK NETWORK LAYER L: netdev@vger.kernel.org S: Odd fixes -F: drivers/net/appletalk/ F: include/linux/atalk.h F: include/uapi/linux/atalk.h F: net/appletalk/ @@ -1584,6 +1583,17 @@ F: arch/arm/include/asm/arch_timer.h F: arch/arm64/include/asm/arch_timer.h F: drivers/clocksource/arm_arch_timer.c +ARM GENERIC INTERRUPT CONTROLLER DRIVERS +M: Marc Zyngier <maz@kernel.org> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/interrupt-controller/arm,gic* +F: arch/arm/include/asm/arch_gicv3.h +F: arch/arm64/include/asm/arch_gicv3.h +F: drivers/irqchip/irq-gic*.[ch] +F: include/linux/irqchip/arm-gic*.h +F: include/linux/irqchip/arm-vgic-info.h + ARM HDLCD DRM DRIVER M: Liviu Dudau <liviu.dudau@arm.com> S: Supported @@ -2210,21 +2220,28 @@ F: arch/arm/boot/dts/ti/omap/omap3-igep* ARM/INTEL IXP4XX ARM ARCHITECTURE M: Linus Walleij <linusw@kernel.org> M: Imre Kaloz <kaloz@openwrt.org> -M: Krzysztof Halasa <khalasa@piap.pl> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml F: Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt F: Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml F: Documentation/devicetree/bindings/memory-controllers/intel,ixp4xx-expansion* +F: Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml F: Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml F: arch/arm/boot/dts/intel/ixp/ F: arch/arm/mach-ixp4xx/ F: drivers/bus/intel-ixp4xx-eb.c +F: drivers/char/hw_random/ixp4xx-rng.c F: drivers/clocksource/timer-ixp4xx.c F: drivers/crypto/intel/ixp4xx/ixp4xx_crypto.c F: drivers/gpio/gpio-ixp4xx.c F: drivers/irqchip/irq-ixp4xx.c +F: drivers/net/ethernet/xscale/ixp4xx_eth.c +F: drivers/net/wan/ixp4xx_hss.c +F: drivers/soc/ixp4xx/ixp4xx-npe.c +F: drivers/soc/ixp4xx/ixp4xx-qmgr.c +F: include/linux/soc/ixp4xx/npe.h +F: include/linux/soc/ixp4xx/qmgr.h ARM/INTEL KEEMBAY ARCHITECTURE M: Paul J. Murphy <paul.j.murphy@intel.com> @@ -2326,7 +2343,7 @@ F: drivers/rtc/rtc-mt7622.c ARM/Mediatek SoC support M: Matthias Brugger <matthias.bgg@gmail.com> -R: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> +M: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> L: linux-kernel@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) @@ -5978,8 +5995,8 @@ F: include/linux/devm-helpers.h DEVICE-MAPPER (LVM) M: Alasdair Kergon <agk@redhat.com> M: Mike Snitzer <snitzer@kernel.org> -M: dm-devel@redhat.com -L: dm-devel@redhat.com +M: dm-devel@lists.linux.dev +L: dm-devel@lists.linux.dev S: Maintained W: http://sources.redhat.com/dm Q: http://patchwork.kernel.org/project/dm-devel/list/ @@ -10624,22 +10641,6 @@ L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/intel/ixp4xx/ixp4xx_crypto.c -INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT -M: Krzysztof Halasa <khalasa@piap.pl> -S: Maintained -F: drivers/net/ethernet/xscale/ixp4xx_eth.c -F: drivers/net/wan/ixp4xx_hss.c -F: drivers/soc/ixp4xx/ixp4xx-npe.c -F: drivers/soc/ixp4xx/ixp4xx-qmgr.c -F: include/linux/soc/ixp4xx/npe.h -F: include/linux/soc/ixp4xx/qmgr.h - -INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT -M: Deepak Saxena <dsaxena@plexity.net> -S: Maintained -F: Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml -F: drivers/char/hw_random/ixp4xx-rng.c - INTEL KEEM BAY DRM DRIVER M: Anitha Chrisanthus <anitha.chrisanthus@intel.com> M: Edmund Dea <edmund.j.dea@intel.com> @@ -11065,7 +11066,7 @@ F: Documentation/devicetree/bindings/sound/irondevice,* F: sound/soc/codecs/sma* IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) -M: Marc Zyngier <maz@kernel.org> +M: Thomas Gleixner <tglx@linutronix.de> S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core F: Documentation/core-api/irq/irq-domain.rst @@ -11084,7 +11085,6 @@ F: lib/group_cpus.c IRQCHIP DRIVERS M: Thomas Gleixner <tglx@linutronix.de> -M: Marc Zyngier <maz@kernel.org> L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core @@ -20490,6 +20490,7 @@ F: include/dt-bindings/clock/starfive?jh71*.h STARFIVE JH71X0 PINCTRL DRIVERS M: Emil Renner Berthing <kernel@esmil.dk> M: Jianlong Huang <jianlong.huang@starfivetech.com> +M: Hal Feng <hal.feng@starfivetech.com> L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pinctrl/starfive,jh71*.yaml @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b10515c0200b..78f20e632712 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1037,6 +1037,19 @@ config ARM64_ERRATUM_2645198 If unsure, say Y. +config ARM64_ERRATUM_2966298 + bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + default y + help + This option adds the workaround for ARM Cortex-A520 erratum 2966298. + + On an affected Cortex-A520 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 6f85a05ee7e1..dcf6e4846ac9 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -185,7 +185,7 @@ #size-cells = <1>; ranges; - anomix_ns_gpr: syscon@44210000 { + aonmix_ns_gpr: syscon@44210000 { compatible = "fsl,imx93-aonmix-ns-syscfg", "syscon"; reg = <0x44210000 0x1000>; }; @@ -319,6 +319,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&aonmix_ns_gpr 0x14 0>; status = "disabled"; }; @@ -591,6 +592,7 @@ assigned-clock-parents = <&clk IMX93_CLK_SYS_PLL_PFD1_DIV2>; assigned-clock-rates = <40000000>; fsl,clk-source = /bits/ 8 <0>; + fsl,stop-mode = <&wakeupmix_gpr 0x0c 2>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 36ef2dbe8add..3ee9266fa8e9 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -905,7 +905,7 @@ status = "disabled"; }; - sata_phy: t-phy@1a243000 { + sata_phy: t-phy { compatible = "mediatek,mt7622-tphy", "mediatek,generic-tphy-v1"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 68539ea788df..24eda00e320d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -434,7 +434,7 @@ }; }; - pcie_phy: t-phy@11c00000 { + pcie_phy: t-phy { compatible = "mediatek,mt7986-tphy", "mediatek,generic-tphy-v2"; #address-cells = <2>; diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index b2485ddfd33b..5d635085fe3f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -48,7 +48,7 @@ memory@40000000 { device_type = "memory"; - reg = <0 0x40000000 0 0x80000000>; + reg = <0 0x40000000 0x2 0x00000000>; }; reserved-memory { @@ -56,13 +56,8 @@ #size-cells = <2>; ranges; - /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ - bl31_secmon_reserved: secmon@54600000 { - no-map; - reg = <0 0x54600000 0x0 0x200000>; - }; - - /* 12 MiB reserved for OP-TEE (BL32) + /* + * 12 MiB reserved for OP-TEE (BL32) * +-----------------------+ 0x43e0_0000 * | SHMEM 2MiB | * +-----------------------+ 0x43c0_0000 @@ -75,6 +70,34 @@ no-map; reg = <0 0x43200000 0 0x00c00000>; }; + + scp_mem: memory@50000000 { + compatible = "shared-dma-pool"; + reg = <0 0x50000000 0 0x2900000>; + no-map; + }; + + vpu_mem: memory@53000000 { + compatible = "shared-dma-pool"; + reg = <0 0x53000000 0 0x1400000>; /* 20 MB */ + }; + + /* 2 MiB reserved for ARM Trusted Firmware (BL31) */ + bl31_secmon_mem: memory@54600000 { + no-map; + reg = <0 0x54600000 0x0 0x200000>; + }; + + snd_dma_mem: memory@60000000 { + compatible = "shared-dma-pool"; + reg = <0 0x60000000 0 0x1100000>; + no-map; + }; + + apu_mem: memory@62000000 { + compatible = "shared-dma-pool"; + reg = <0 0x62000000 0 0x1400000>; /* 20 MB */ + }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index a9e52b50c8c4..54c674c45b49 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -313,6 +313,7 @@ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH 0>; cpus = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>, <&cpu4>, <&cpu5>, <&cpu6>, <&cpu7>; + status = "fail"; }; dmic_codec: dmic-codec { diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index a7c3020a5de4..06c53000bb74 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3958,7 +3958,7 @@ pdc: interrupt-controller@b220000 { compatible = "qcom,sm8150-pdc", "qcom,pdc"; - reg = <0 0x0b220000 0 0x400>; + reg = <0 0x0b220000 0 0x30000>; qcom,pdc-ranges = <0 480 94>, <94 609 31>, <125 63 1>; #interrupt-cells = <2>; diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 4d537d56eb84..6792a1f83f2a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -9,6 +9,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include <linux/cpuidle.h> #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> @@ -44,6 +45,24 @@ #define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ trbe_interrupt) + sizeof(u16)) +/* + * Arm® Functional Fixed Hardware Specification Version 1.2. + * Table 2: Arm Architecture context loss flags + */ +#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */ + +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + if (arch_flags & CPUIDLE_CORE_CTXT) + return CPUIDLE_FLAG_TIMER_STOP; + + return 0; +} +#define arch_get_idle_state_flags arch_get_idle_state_flags + +#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */ +#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */ +#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5f6f84837a49..74d00feb62f0 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -79,6 +79,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 @@ -148,6 +149,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index be66e94a21bd..5706e74c5578 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -730,6 +730,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_clear_bf16_from_user_emulation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + { + .desc = "ARM erratum 2966298", + .capability = ARM64_WORKAROUND_2966298, + /* Cortex-A520 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + }, +#endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 { .desc = "AmpereOne erratum AC03_CPU_38", diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ad61de03d0a..a6030913cd58 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,6 +428,10 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 +alternative_if ARM64_WORKAROUND_2966298 + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c3f06fdef609..dea3dc89234b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -84,6 +84,7 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 +WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 6d28b5514699..ee9e071859b2 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -2,39 +2,42 @@ #ifndef __PARISC_LDCW_H #define __PARISC_LDCW_H -#ifndef CONFIG_PA20 /* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, and GCC only guarantees 8-byte alignment for stack locals, we can't be assured of 16-byte alignment for atomic lock data even if we specify "__attribute ((aligned(16)))" in the type declaration. So, we use a struct containing an array of four ints for the atomic lock type and dynamically select the 16-byte aligned int from the array - for the semaphore. */ + for the semaphore. */ + +/* From: "Jim Hull" <jim.hull of hp.com> + I've attached a summary of the change, but basically, for PA 2.0, as + long as the ",CO" (coherent operation) completer is implemented, then the + 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead + they only require "natural" alignment (4-byte for ldcw, 8-byte for + ldcd). + + Although the cache control hint is accepted by all PA 2.0 processors, + it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still + require 16-byte alignment. If the address is unaligned, the operation + of the instruction is undefined. The ldcw instruction does not generate + unaligned data reference traps so misaligned accesses are not detected. + This hid the problem for years. So, restore the 16-byte alignment dropped + by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ #define __PA_LDCW_ALIGNMENT 16 -#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ & ~(__PA_LDCW_ALIGNMENT - 1); \ (volatile unsigned int *) __ret; \ }) -#define __LDCW "ldcw" -#else /*CONFIG_PA20*/ -/* From: "Jim Hull" <jim.hull of hp.com> - I've attached a summary of the change, but basically, for PA 2.0, as - long as the ",CO" (coherent operation) completer is specified, then the - 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead - they only require "natural" alignment (4-byte for ldcw, 8-byte for - ldcd). */ - -#define __PA_LDCW_ALIGNMENT 4 -#define __PA_LDCW_ALIGN_ORDER 2 -#define __ldcw_align(a) (&(a)->slock) +#ifdef CONFIG_PA20 #define __LDCW "ldcw,co" - -#endif /*!CONFIG_PA20*/ +#else +#define __LDCW "ldcw" +#endif /* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. We don't explicitly expose that "*a" may be written as reload diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index efd06a897c6a..7b986b09dba8 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -9,15 +9,10 @@ #ifndef __ASSEMBLY__ typedef struct { -#ifdef CONFIG_PA20 - volatile unsigned int slock; -# define __ARCH_SPIN_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED_VAL } -#else volatile unsigned int lock[4]; # define __ARCH_SPIN_LOCK_UNLOCKED \ { { __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL, \ __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL } } -#endif } arch_spinlock_t; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 4098f9a0964b..2019c1f04bd0 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -440,7 +440,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) if (cpu_online(cpu)) return 0; - if (num_online_cpus() < setup_max_cpus && smp_boot_one_cpu(cpu, tidle)) + if (num_online_cpus() < nr_cpu_ids && + num_online_cpus() < setup_max_cpus && + smp_boot_one_cpu(cpu, tidle)) return -EIO; return cpu_online(cpu) ? 0 : -EIO; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index ecd3ae6f4116..8581693e62d3 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -245,7 +245,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ if (!is_tail_call) - emit_mv(RV_REG_A0, RV_REG_A5, ctx); + emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); @@ -759,8 +759,10 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; - if (save_ret) - emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx); + if (save_ret) { + emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); + } /* update branch with beqz */ if (ctx->insns) { @@ -853,7 +855,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) { - stack_size += 8; + stack_size += 16; /* Save both A5 (BPF R0) and A0 */ retval_off = stack_size; } @@ -957,6 +959,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (ret) goto out; emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx); + emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx); im->ip_after_call = ctx->insns + ctx->ninsns; /* 2 nops reserved for auipc+jalr pair */ emit(rv_nop(), ctx); @@ -988,8 +991,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_args(nregs, args_off, ctx); - if (save_ret) + if (save_ret) { emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx); + emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx); + } emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); @@ -1515,7 +1520,8 @@ out_be: if (ret) return ret; - emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); + if (insn->src_reg != BPF_PSEUDO_CALL) + emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx); break; } /* tail call */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e6a643f63ebf..bf06b7283f0c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -670,15 +670,18 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) static int get_probe_mem_regno(const u8 *insn) { /* - * insn must point to llgc, llgh, llgf or lg, which have destination - * register at the same position. + * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have + * destination register at the same position. */ - if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */ + if (insn[0] != 0xe3) /* common prefix */ return -1; if (insn[5] != 0x90 && /* llgc */ insn[5] != 0x91 && /* llgh */ insn[5] != 0x16 && /* llgf */ - insn[5] != 0x04) /* lg */ + insn[5] != 0x04 && /* lg */ + insn[5] != 0x77 && /* lgb */ + insn[5] != 0x15 && /* lgh */ + insn[5] != 0x14) /* lgf */ return -1; return insn[1] >> 4; } @@ -776,6 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; + s16 branch_oc_off = insn->off; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; int last, insn_count = 1; @@ -788,22 +792,55 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int err; if (BPF_CLASS(insn->code) == BPF_LDX && - BPF_MODE(insn->code) == BPF_PROBE_MEM) + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) probe_prg = jit->prg; switch (insn->code) { /* * BPF_MOV */ - case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ - /* llgfr %dst,%src */ - EMIT4(0xb9160000, dst_reg, src_reg); - if (insn_is_zext(&insn[1])) - insn_count = 2; + case BPF_ALU | BPF_MOV | BPF_X: + switch (insn->off) { + case 0: /* DST = (u32) SRC */ + /* llgfr %dst,%src */ + EMIT4(0xb9160000, dst_reg, src_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; + break; + case 8: /* DST = (u32)(s8) SRC */ + /* lbr %dst,%src */ + EMIT4(0xb9260000, dst_reg, src_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + case 16: /* DST = (u32)(s16) SRC */ + /* lhr %dst,%src */ + EMIT4(0xb9270000, dst_reg, src_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + } break; - case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ - /* lgr %dst,%src */ - EMIT4(0xb9040000, dst_reg, src_reg); + case BPF_ALU64 | BPF_MOV | BPF_X: + switch (insn->off) { + case 0: /* DST = SRC */ + /* lgr %dst,%src */ + EMIT4(0xb9040000, dst_reg, src_reg); + break; + case 8: /* DST = (s8) SRC */ + /* lgbr %dst,%src */ + EMIT4(0xb9060000, dst_reg, src_reg); + break; + case 16: /* DST = (s16) SRC */ + /* lghr %dst,%src */ + EMIT4(0xb9070000, dst_reg, src_reg); + break; + case 32: /* DST = (s32) SRC */ + /* lgfr %dst,%src */ + EMIT4(0xb9140000, dst_reg, src_reg); + break; + } break; case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ /* llilf %dst,imm */ @@ -912,66 +949,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * BPF_DIV / BPF_MOD */ - case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */ - case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; - /* lhi %w0,0 */ - EMIT4_IMM(0xa7080000, REG_W0, 0); - /* lr %w1,%dst */ - EMIT2(0x1800, REG_W1, dst_reg); - /* dlr %w0,%src */ - EMIT4(0xb9970000, REG_W0, src_reg); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) src */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dlr %w0,%src */ + EMIT4(0xb9970000, REG_W0, src_reg); + break; + case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */ + /* lgfr %r1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* dsgfr %r0,%src */ + EMIT4(0xb91d0000, REG_W0, src_reg); + break; + } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); if (insn_is_zext(&insn[1])) insn_count = 2; break; } - case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */ + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; - /* lghi %w0,0 */ - EMIT4_IMM(0xa7090000, REG_W0, 0); - /* lgr %w1,%dst */ - EMIT4(0xb9040000, REG_W1, dst_reg); - /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, src_reg); + switch (off) { + case 0: /* dst = dst {/,%} src */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlgr %w0,%src */ + EMIT4(0xb9870000, REG_W0, src_reg); + break; + case 1: /* dst = (s64) dst {/,%} (s64) src */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dsgr %w0,%src */ + EMIT4(0xb90d0000, REG_W0, src_reg); + break; + } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; } - case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */ - case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; if (imm == 1) { if (BPF_OP(insn->code) == BPF_MOD) - /* lhgi %dst,0 */ + /* lghi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); else EMIT_ZERO(dst_reg); break; } - /* lhi %w0,0 */ - EMIT4_IMM(0xa7080000, REG_W0, 0); - /* lr %w1,%dst */ - EMIT2(0x1800, REG_W1, dst_reg); if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) { - /* dl %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, - EMIT_CONST_U32(imm)); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) imm */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dl %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, + REG_L, EMIT_CONST_U32(imm)); + break; + case 1: /* dst = (s32) dst {/,%} (s32) imm */ + /* lgfr %r1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* dsgf %r0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0, + REG_L, EMIT_CONST_U32(imm)); + break; + } } else { - /* lgfrl %dst,imm */ - EMIT6_PCREL_RILB(0xc40c0000, dst_reg, - _EMIT_CONST_U32(imm)); - jit->seen |= SEEN_LITERAL; - /* dlr %w0,%dst */ - EMIT4(0xb9970000, REG_W0, dst_reg); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) imm */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* lrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40d0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dlr %w0,%dst */ + EMIT4(0xb9970000, REG_W0, dst_reg); + break; + case 1: /* dst = (s32) dst {/,%} (s32) imm */ + /* lgfr %w1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* lgfrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40c0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dsgr %w0,%dst */ + EMIT4(0xb90d0000, REG_W0, dst_reg); + break; + } } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); @@ -979,8 +1065,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; } - case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -990,21 +1076,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7090000, dst_reg, 0); break; } - /* lghi %w0,0 */ - EMIT4_IMM(0xa7090000, REG_W0, 0); - /* lgr %w1,%dst */ - EMIT4(0xb9040000, REG_W1, dst_reg); if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { - /* dlg %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64(imm)); + switch (off) { + case 0: /* dst = dst {/,%} imm */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, + REG_L, EMIT_CONST_U64(imm)); + break; + case 1: /* dst = (s64) dst {/,%} (s64) imm */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dsg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0, + REG_L, EMIT_CONST_U64(imm)); + break; + } } else { - /* lgrl %dst,imm */ - EMIT6_PCREL_RILB(0xc4080000, dst_reg, - _EMIT_CONST_U64(imm)); - jit->seen |= SEEN_LITERAL; - /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, dst_reg); + switch (off) { + case 0: /* dst = dst {/,%} imm */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dlgr %w0,%dst */ + EMIT4(0xb9870000, REG_W0, dst_reg); + break; + case 1: /* dst = (s64) dst {/,%} (s64) imm */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dsgr %w0,%dst */ + EMIT4(0xb90d0000, REG_W0, dst_reg); + break; + } } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); @@ -1217,6 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, } break; case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm) { case 16: /* dst = (u16) cpu_to_le16(dst) */ /* lrvr %dst,%dst */ @@ -1374,6 +1490,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + /* lgb %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* llgh %dst,0(off,%src) */ @@ -1382,6 +1504,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + /* lgh %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* llgf %dst,off(%src) */ @@ -1390,6 +1518,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + /* lgf %dst,off(%src) */ + jit->seen |= SEEN_MEM; + EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off); + break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* lg %dst,0(off,%src) */ @@ -1570,6 +1704,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * instruction itself (loop) and for BPF with offset 0 we * branch to the instruction behind the branch. */ + case BPF_JMP32 | BPF_JA: /* if (true) */ + branch_oc_off = imm; + fallthrough; case BPF_JMP | BPF_JA: /* if (true) */ mask = 0xf000; /* j */ goto branch_oc; @@ -1738,14 +1875,16 @@ branch_xu: break; branch_oc: if (!is_first_pass(jit) && - can_use_rel(jit, addrs[i + off + 1])) { + can_use_rel(jit, addrs[i + branch_oc_off + 1])) { /* brc mask,off */ EMIT4_PCREL_RIC(0xa7040000, - mask >> 12, addrs[i + off + 1]); + mask >> 12, + addrs[i + branch_oc_off + 1]); } else { /* brcl mask,off */ EMIT6_PCREL_RILC(0xc0040000, - mask >> 12, addrs[i + off + 1]); + mask >> 12, + addrs[i + branch_oc_off + 1]); } break; } @@ -2066,6 +2205,7 @@ struct bpf_tramp_jit { * func_addr's original caller */ int stack_size; /* Trampoline stack size */ + int backchain_off; /* Offset of backchain */ int stack_args_off; /* Offset of stack arguments for calling * func_addr, has to be at the top */ @@ -2086,9 +2226,10 @@ struct bpf_tramp_jit { * for __bpf_prog_enter() return value and * func_addr respectively */ - int r14_off; /* Offset of saved %r14 */ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */ int tccnt_off; /* Offset of saved tailcall counter */ + int r14_off; /* Offset of saved %r14, has to be at the + * bottom */ int do_fexit; /* do_fexit: label */ }; @@ -2247,8 +2388,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, * Calculate the stack layout. */ - /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */ + /* + * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x + * ABI requires, put our backchain at the end of the allocated memory. + */ tjit->stack_size = STACK_FRAME_OVERHEAD; + tjit->backchain_off = tjit->stack_size - sizeof(u64); tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64)); tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64)); tjit->ip_off = alloc_stack(tjit, sizeof(u64)); @@ -2256,16 +2401,25 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64)); tjit->retval_off = alloc_stack(tjit, sizeof(u64)); tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64)); - tjit->r14_off = alloc_stack(tjit, sizeof(u64)); tjit->run_ctx_off = alloc_stack(tjit, sizeof(struct bpf_tramp_run_ctx)); tjit->tccnt_off = alloc_stack(tjit, sizeof(u64)); - /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */ - tjit->stack_size -= STACK_FRAME_OVERHEAD; + tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2); + /* + * In accordance with the s390x ABI, the caller has allocated + * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's + * backchain, and the rest we can use. + */ + tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64); tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD; + /* lgr %r1,%r15 */ + EMIT4(0xb9040000, REG_1, REG_15); /* aghi %r15,-stack_size */ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size); + /* stg %r1,backchain_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, + tjit->backchain_off); /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ _EMIT6(0xd203f000 | tjit->tccnt_off, 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 783ed339f341..21556ad87f4b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -7,6 +7,8 @@ * Author : K. Y. Srinivasan <kys@microsoft.com> */ +#define pr_fmt(fmt) "Hyper-V: " fmt + #include <linux/efi.h> #include <linux/types.h> #include <linux/bitfield.h> @@ -191,7 +193,7 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; if (!hv_reenlightenment_available()) { - pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + pr_warn("reenlightenment support is unavailable\n"); return; } @@ -394,6 +396,7 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) static u8 __init get_vtl(void) { u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; @@ -416,13 +419,16 @@ static u8 __init get_vtl(void) if (hv_result_success(ret)) { ret = output->as64.low & HV_X64_VTL_MASK; } else { - pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); - ret = 0; + pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); + BUG(); } local_irq_restore(flags); return ret; } +#else +static inline u8 get_vtl(void) { return 0; } +#endif /* * This function is to be invoked early in the boot sequence after the @@ -564,7 +570,7 @@ skip_hypercall_pg_init: if (cpu_feature_enabled(X86_FEATURE_IBT) && *(u32 *)hv_hypercall_pg != gen_endbr()) { setup_clear_cpu_cap(X86_FEATURE_IBT); - pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + pr_warn("Disabling IBT because of Hyper-V bug\n"); } #endif @@ -604,8 +610,10 @@ skip_hypercall_pg_init: hv_query_ext_cap(0); /* Find the VTL */ - if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) - ms_hyperv.vtl = get_vtl(); + ms_hyperv.vtl = get_vtl(); + + if (ms_hyperv.vtl > 0) /* non default VTL */ + hv_vtl_early_init(); return; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 36a562218010..999f5ac82fe9 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -215,7 +215,7 @@ static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip) return hv_vtl_bringup_vcpu(vp_id, start_eip); } -static int __init hv_vtl_early_init(void) +int __init hv_vtl_early_init(void) { /* * `boot_cpu_has` returns the runtime feature support, @@ -230,4 +230,3 @@ static int __init hv_vtl_early_init(void) return 0; } -early_initcall(hv_vtl_early_init); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 033b53f993c6..896445edc6a8 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -340,8 +340,10 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } #ifdef CONFIG_HYPERV_VTL_MODE void __init hv_vtl_init_platform(void); +int __init hv_vtl_early_init(void); #else static inline void __init hv_vtl_init_platform(void) {} +static inline int __init hv_vtl_early_init(void) { return 0; } #endif #include <asm-generic/mshyperv.h> diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2eabccde94fb..dcf325b7b022 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -256,7 +256,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) return 0; } -static int sev_cpuid_hv(struct cpuid_leaf *leaf) +static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) { int ret; @@ -279,6 +279,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf) return ret; } +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + u32 cr4 = native_read_cr4(); + int ret; + + ghcb_set_rax(ghcb, leaf->fn); + ghcb_set_rcx(ghcb, leaf->subfn); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #UD - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + leaf->eax = ghcb->save.rax; + leaf->ebx = ghcb->save.rbx; + leaf->ecx = ghcb->save.rcx; + leaf->edx = ghcb->save.rdx; + + return ES_OK; +} + +static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) +{ + return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) + : __sev_cpuid_hv_msr(leaf); +} + /* * This may be called early while still running on the initial identity * mapping. Use RIP-relative addressing to obtain the correct address @@ -388,19 +427,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) return false; } -static void snp_cpuid_hv(struct cpuid_leaf *leaf) +static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { - if (sev_cpuid_hv(leaf)) + if (sev_cpuid_hv(ghcb, ctxt, leaf)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); } -static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) +static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + struct cpuid_leaf *leaf) { struct cpuid_leaf leaf_hv = *leaf; switch (leaf->fn) { case 0x1: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* initial APIC ID */ leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); @@ -419,7 +459,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) break; case 0xB: leaf_hv.subfn = 0; - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->edx = leaf_hv.edx; @@ -467,7 +507,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) } break; case 0x8000001E: - snp_cpuid_hv(&leaf_hv); + snp_cpuid_hv(ghcb, ctxt, &leaf_hv); /* extended APIC ID */ leaf->eax = leaf_hv.eax; @@ -488,7 +528,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value * should be treated as fatal by caller. */ -static int snp_cpuid(struct cpuid_leaf *leaf) +static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -522,7 +562,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf) return 0; } - return snp_cpuid_postprocess(leaf); + return snp_cpuid_postprocess(ghcb, ctxt, leaf); } /* @@ -544,14 +584,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) leaf.fn = fn; leaf.subfn = subfn; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(NULL, NULL, &leaf); if (!ret) goto cpuid_done; if (ret != -EOPNOTSUPP) goto fail; - if (sev_cpuid_hv(&leaf)) + if (__sev_cpuid_hv_msr(&leaf)) goto fail; cpuid_done: @@ -848,14 +888,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } -static int vc_handle_cpuid_snp(struct pt_regs *regs) +static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { + struct pt_regs *regs = ctxt->regs; struct cpuid_leaf leaf; int ret; leaf.fn = regs->ax; leaf.subfn = regs->cx; - ret = snp_cpuid(&leaf); + ret = snp_cpuid(ghcb, ctxt, &leaf); if (!ret) { regs->ax = leaf.eax; regs->bx = leaf.ebx; @@ -874,7 +915,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb, enum es_result ret; int snp_cpuid_ret; - snp_cpuid_ret = vc_handle_cpuid_snp(regs); + snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt); if (!snp_cpuid_ret) return ES_OK; if (snp_cpuid_ret != -EOPNOTSUPP) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2787826d9f60..d8c1e3be74c0 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -868,8 +868,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned long npages) void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - unsigned long vaddr; - unsigned int npages; + unsigned long vaddr, npages; if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index dc615ef6550a..3a34a8c425fe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1217,8 +1217,7 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) strscpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); state->exit_latency = lpi->wake_latency; state->target_residency = lpi->min_residency; - if (lpi->arch_flags) - state->flags |= CPUIDLE_FLAG_TIMER_STOP; + state->flags |= arch_get_idle_state_flags(lpi->arch_flags); if (i != 0 && lpi->entry_method == ACPI_CSTATE_FFH) state->flags |= CPUIDLE_FLAG_RCU_IDLE; state->enter = acpi_idle_lpi_enter; diff --git a/drivers/ata/pata_parport/fit3.c b/drivers/ata/pata_parport/fit3.c index bad7aa920cdc..d2b81cf2e16d 100644 --- a/drivers/ata/pata_parport/fit3.c +++ b/drivers/ata/pata_parport/fit3.c @@ -9,11 +9,6 @@ * * The TD-2000 and certain older devices use a different protocol. * Try the fit2 protocol module with them. - * - * NB: The FIT adapters do not appear to support the control - * registers. So, we map ALT_STATUS to STATUS and NO-OP writes - * to the device control register - this means that IDE reset - * will not work on these devices. */ #include <linux/module.h> @@ -37,8 +32,7 @@ static void fit3_write_regr(struct pi_adapter *pi, int cont, int regr, int val) { - if (cont == 1) - return; + regr += cont << 3; switch (pi->mode) { case 0: @@ -59,11 +53,7 @@ static int fit3_read_regr(struct pi_adapter *pi, int cont, int regr) { int a, b; - if (cont) { - if (regr != 6) - return 0xff; - regr = 7; - } + regr += cont << 3; switch (pi->mode) { case 0: diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index 1af64d435d3c..a7adfdcb5e27 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -51,6 +51,13 @@ static void pata_parport_dev_select(struct ata_port *ap, unsigned int device) ata_sff_pause(ap); } +static void pata_parport_set_devctl(struct ata_port *ap, u8 ctl) +{ + struct pi_adapter *pi = ap->host->private_data; + + pi->proto->write_regr(pi, 1, 6, ctl); +} + static bool pata_parport_devchk(struct ata_port *ap, unsigned int device) { struct pi_adapter *pi = ap->host->private_data; @@ -64,7 +71,7 @@ static bool pata_parport_devchk(struct ata_port *ap, unsigned int device) pi->proto->write_regr(pi, 0, ATA_REG_NSECT, 0xaa); pi->proto->write_regr(pi, 0, ATA_REG_LBAL, 0x55); - pi->proto->write_regr(pi, 0, ATA_REG_NSECT, 055); + pi->proto->write_regr(pi, 0, ATA_REG_NSECT, 0x55); pi->proto->write_regr(pi, 0, ATA_REG_LBAL, 0xaa); nsect = pi->proto->read_regr(pi, 0, ATA_REG_NSECT); @@ -73,6 +80,72 @@ static bool pata_parport_devchk(struct ata_port *ap, unsigned int device) return (nsect == 0x55) && (lbal == 0xaa); } +static int pata_parport_wait_after_reset(struct ata_link *link, + unsigned int devmask, + unsigned long deadline) +{ + struct ata_port *ap = link->ap; + struct pi_adapter *pi = ap->host->private_data; + unsigned int dev0 = devmask & (1 << 0); + unsigned int dev1 = devmask & (1 << 1); + int rc, ret = 0; + + ata_msleep(ap, ATA_WAIT_AFTER_RESET); + + /* always check readiness of the master device */ + rc = ata_sff_wait_ready(link, deadline); + if (rc) { + /* + * some adapters return bogus values if master device is not + * present, so don't abort now if a slave device is present + */ + if (!dev1) + return rc; + ret = -ENODEV; + } + + /* + * if device 1 was found in ata_devchk, wait for register + * access briefly, then wait for BSY to clear. + */ + if (dev1) { + int i; + + pata_parport_dev_select(ap, 1); + + /* + * Wait for register access. Some ATAPI devices fail + * to set nsect/lbal after reset, so don't waste too + * much time on it. We're gonna wait for !BSY anyway. + */ + for (i = 0; i < 2; i++) { + u8 nsect, lbal; + + nsect = pi->proto->read_regr(pi, 0, ATA_REG_NSECT); + lbal = pi->proto->read_regr(pi, 0, ATA_REG_LBAL); + if (nsect == 1 && lbal == 1) + break; + /* give drive a breather */ + ata_msleep(ap, 50); + } + + rc = ata_sff_wait_ready(link, deadline); + if (rc) { + if (rc != -ENODEV) + return rc; + ret = rc; + } + } + + pata_parport_dev_select(ap, 0); + if (dev1) + pata_parport_dev_select(ap, 1); + if (dev0) + pata_parport_dev_select(ap, 0); + + return ret; +} + static int pata_parport_bus_softreset(struct ata_port *ap, unsigned int devmask, unsigned long deadline) { @@ -87,7 +160,7 @@ static int pata_parport_bus_softreset(struct ata_port *ap, unsigned int devmask, ap->last_ctl = ap->ctl; /* wait the port to become ready */ - return ata_sff_wait_after_reset(&ap->link, devmask, deadline); + return pata_parport_wait_after_reset(&ap->link, devmask, deadline); } static int pata_parport_softreset(struct ata_link *link, unsigned int *classes, @@ -252,6 +325,7 @@ static struct ata_port_operations pata_parport_port_ops = { .hardreset = NULL, .sff_dev_select = pata_parport_dev_select, + .sff_set_devctl = pata_parport_set_devctl, .sff_check_status = pata_parport_check_status, .sff_check_altstatus = pata_parport_check_altstatus, .sff_tf_load = pata_parport_tf_load, diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index fb2be3574c26..50d8ce20ae5b 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -36,7 +36,7 @@ #ifdef CONFIG_SBUS #include <linux/of.h> -#include <linux/of_device.h> +#include <linux/platform_device.h> #include <asm/idprom.h> #include <asm/openprom.h> #include <asm/oplib.h> @@ -2520,18 +2520,12 @@ static int fore200e_init(struct fore200e *fore200e, struct device *parent) } #ifdef CONFIG_SBUS -static const struct of_device_id fore200e_sba_match[]; static int fore200e_sba_probe(struct platform_device *op) { - const struct of_device_id *match; struct fore200e *fore200e; static int index = 0; int err; - match = of_match_device(fore200e_sba_match, &op->dev); - if (!match) - return -EINVAL; - fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL); if (!fore200e) return -ENOMEM; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index df1cd0f718b8..800f131222fc 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1436,8 +1436,9 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd) static void nbd_clear_sock_ioctl(struct nbd_device *nbd) { - blk_mark_disk_dead(nbd->disk); nbd_clear_sock(nbd); + disk_force_media_change(nbd->disk); + nbd_bdev_reset(nbd); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index e20daba6896a..a6dc3997bf5c 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -213,6 +213,53 @@ dpll_msg_add_pin_direction(struct sk_buff *msg, struct dpll_pin *pin, } static int +dpll_msg_add_pin_phase_adjust(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + s32 phase_adjust; + int ret; + + if (!ops->phase_adjust_get) + return 0; + ret = ops->phase_adjust_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), + &phase_adjust, extack); + if (ret) + return ret; + if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST, phase_adjust)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_phase_offset(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + s64 phase_offset; + int ret; + + if (!ops->phase_offset_get) + return 0; + ret = ops->phase_offset_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &phase_offset, + extack); + if (ret) + return ret; + if (nla_put_64bit(msg, DPLL_A_PIN_PHASE_OFFSET, sizeof(phase_offset), + &phase_offset, DPLL_A_PIN_PAD)) + return -EMSGSIZE; + + return 0; +} + +static int dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, struct dpll_pin_ref *ref, struct netlink_ext_ack *extack) { @@ -332,6 +379,9 @@ dpll_msg_add_pin_dplls(struct sk_buff *msg, struct dpll_pin *pin, ret = dpll_msg_add_pin_direction(msg, pin, ref, extack); if (ret) goto nest_cancel; + ret = dpll_msg_add_phase_offset(msg, pin, ref, extack); + if (ret) + goto nest_cancel; nla_nest_end(msg, attr); } @@ -379,6 +429,15 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, ret = dpll_msg_add_pin_freq(msg, pin, ref, extack); if (ret) return ret; + if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST_MIN, + prop->phase_range.min)) + return -EMSGSIZE; + if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST_MAX, + prop->phase_range.max)) + return -EMSGSIZE; + ret = dpll_msg_add_pin_phase_adjust(msg, pin, ref, extack); + if (ret) + return ret; if (xa_empty(&pin->parent_refs)) ret = dpll_msg_add_pin_dplls(msg, pin, extack); else @@ -416,7 +475,7 @@ dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg, if (nla_put_u32(msg, DPLL_A_TYPE, dpll->type)) return -EMSGSIZE; - return ret; + return 0; } static int @@ -556,8 +615,10 @@ static int dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a, struct netlink_ext_ack *extack) { - u64 freq = nla_get_u64(a); - struct dpll_pin_ref *ref; + u64 freq = nla_get_u64(a), old_freq; + struct dpll_pin_ref *ref, *failed; + const struct dpll_pin_ops *ops; + struct dpll_device *dpll; unsigned long i; int ret; @@ -567,19 +628,51 @@ dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a, } xa_for_each(&pin->dpll_refs, i, ref) { - const struct dpll_pin_ops *ops = dpll_pin_ops(ref); - struct dpll_device *dpll = ref->dpll; - - if (!ops->frequency_set) + ops = dpll_pin_ops(ref); + if (!ops->frequency_set || !ops->frequency_get) { + NL_SET_ERR_MSG(extack, "frequency set not supported by the device"); return -EOPNOTSUPP; + } + } + ref = dpll_xa_ref_dpll_first(&pin->dpll_refs); + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + ret = ops->frequency_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &old_freq, extack); + if (ret) { + NL_SET_ERR_MSG(extack, "unable to get old frequency value"); + return ret; + } + if (freq == old_freq) + return 0; + + xa_for_each(&pin->dpll_refs, i, ref) { + ops = dpll_pin_ops(ref); + dpll = ref->dpll; ret = ops->frequency_set(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, dpll_priv(dpll), freq, extack); - if (ret) - return ret; + if (ret) { + failed = ref; + NL_SET_ERR_MSG_FMT(extack, "frequency set failed for dpll_id:%u", + dpll->id); + goto rollback; + } } __dpll_pin_change_ntf(pin); return 0; + +rollback: + xa_for_each(&pin->dpll_refs, i, ref) { + if (ref == failed) + break; + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + if (ops->frequency_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), old_freq, extack)) + NL_SET_ERR_MSG(extack, "set frequency rollback failed"); + } + return ret; } static int @@ -706,6 +799,78 @@ dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll, } static int +dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr, + struct netlink_ext_ack *extack) +{ + struct dpll_pin_ref *ref, *failed; + const struct dpll_pin_ops *ops; + s32 phase_adj, old_phase_adj; + struct dpll_device *dpll; + unsigned long i; + int ret; + + phase_adj = nla_get_s32(phase_adj_attr); + if (phase_adj > pin->prop->phase_range.max || + phase_adj < pin->prop->phase_range.min) { + NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr, + "phase adjust value not supported"); + return -EINVAL; + } + + xa_for_each(&pin->dpll_refs, i, ref) { + ops = dpll_pin_ops(ref); + if (!ops->phase_adjust_set || !ops->phase_adjust_get) { + NL_SET_ERR_MSG(extack, "phase adjust not supported"); + return -EOPNOTSUPP; + } + } + ref = dpll_xa_ref_dpll_first(&pin->dpll_refs); + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + ret = ops->phase_adjust_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &old_phase_adj, + extack); + if (ret) { + NL_SET_ERR_MSG(extack, "unable to get old phase adjust value"); + return ret; + } + if (phase_adj == old_phase_adj) + return 0; + + xa_for_each(&pin->dpll_refs, i, ref) { + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + ret = ops->phase_adjust_set(pin, + dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), phase_adj, + extack); + if (ret) { + failed = ref; + NL_SET_ERR_MSG_FMT(extack, + "phase adjust set failed for dpll_id:%u", + dpll->id); + goto rollback; + } + } + __dpll_pin_change_ntf(pin); + + return 0; + +rollback: + xa_for_each(&pin->dpll_refs, i, ref) { + if (ref == failed) + break; + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + if (ops->phase_adjust_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), old_phase_adj, + extack)) + NL_SET_ERR_MSG(extack, "set phase adjust rollback failed"); + } + return ret; +} + +static int dpll_pin_parent_device_set(struct dpll_pin *pin, struct nlattr *parent_nest, struct netlink_ext_ack *extack) { @@ -793,6 +958,11 @@ dpll_pin_set_from_nlattr(struct dpll_pin *pin, struct genl_info *info) if (ret) return ret; break; + case DPLL_A_PIN_PHASE_ADJUST: + ret = dpll_pin_phase_adj_set(pin, a, info->extack); + if (ret) + return ret; + break; case DPLL_A_PIN_PARENT_DEVICE: ret = dpll_pin_parent_device_set(pin, a, info->extack); if (ret) diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c index 14064c8c783b..eaee5be7aa64 100644 --- a/drivers/dpll/dpll_nl.c +++ b/drivers/dpll/dpll_nl.c @@ -11,11 +11,12 @@ #include <uapi/linux/dpll.h> /* Common nested types */ -const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1] = { +const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_PHASE_OFFSET + 1] = { [DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, }, [DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2), [DPLL_A_PIN_PRIO] = { .type = NLA_U32, }, [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3), + [DPLL_A_PIN_PHASE_OFFSET] = { .type = NLA_S64, }, }; const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1] = { @@ -61,7 +62,7 @@ static const struct nla_policy dpll_pin_get_dump_nl_policy[DPLL_A_PIN_ID + 1] = }; /* DPLL_CMD_PIN_SET - do */ -static const struct nla_policy dpll_pin_set_nl_policy[DPLL_A_PIN_PARENT_PIN + 1] = { +static const struct nla_policy dpll_pin_set_nl_policy[DPLL_A_PIN_PHASE_ADJUST + 1] = { [DPLL_A_PIN_ID] = { .type = NLA_U32, }, [DPLL_A_PIN_FREQUENCY] = { .type = NLA_U64, }, [DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2), @@ -69,6 +70,7 @@ static const struct nla_policy dpll_pin_set_nl_policy[DPLL_A_PIN_PARENT_PIN + 1] [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3), [DPLL_A_PIN_PARENT_DEVICE] = NLA_POLICY_NESTED(dpll_pin_parent_device_nl_policy), [DPLL_A_PIN_PARENT_PIN] = NLA_POLICY_NESTED(dpll_pin_parent_pin_nl_policy), + [DPLL_A_PIN_PHASE_ADJUST] = { .type = NLA_S32, }, }; /* Ops table for dpll */ @@ -140,7 +142,7 @@ static const struct genl_split_ops dpll_nl_ops[] = { .doit = dpll_nl_pin_set_doit, .post_doit = dpll_pin_post_doit, .policy = dpll_pin_set_nl_policy, - .maxattr = DPLL_A_PIN_PARENT_PIN, + .maxattr = DPLL_A_PIN_PHASE_ADJUST, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, }; diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h index 1f67aaed4742..92d4c9c4f788 100644 --- a/drivers/dpll/dpll_nl.h +++ b/drivers/dpll/dpll_nl.h @@ -12,7 +12,7 @@ #include <uapi/linux/dpll.h> /* Common nested types */ -extern const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1]; +extern const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_PHASE_OFFSET + 1]; extern const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1]; int dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index da33bbbdacb9..58f107194fda 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -973,7 +973,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset, else if (param == PIN_CONFIG_BIAS_DISABLE || param == PIN_CONFIG_BIAS_PULL_DOWN || param == PIN_CONFIG_DRIVE_STRENGTH) - return pinctrl_gpio_set_config(offset, config); + return pinctrl_gpio_set_config(chip->base + offset, config); else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN || param == PIN_CONFIG_DRIVE_OPEN_SOURCE) /* Return -ENOTSUPP to trigger emulation, as per datasheet */ diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 7e9f7a32d3ee..cae9661862fe 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -237,6 +237,7 @@ static bool pxa_gpio_has_pinctrl(void) switch (gpio_type) { case PXA3XX_GPIO: case MMP2_GPIO: + case MMP_GPIO: return false; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 30c4f5cca02c..2b8356699f23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2093,7 +2093,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { - parent = pci_upstream_bridge(adev->pdev); + parent = pcie_find_root_port(adev->pdev); adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 9c66d98af6d8..7cd0dfaeee20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -170,6 +170,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) csum += pia[size - 1]; if (csum) { DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + kfree(pia); return -EIO; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index c435f7632e8e..5ee87965a078 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -157,7 +157,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) @@ -188,7 +188,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 984b52923534..e9345f6554db 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -355,7 +355,7 @@ static void dcn32_update_clocks_update_dentist( int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) @@ -401,7 +401,7 @@ static void dcn32_update_clocks_update_dentist( int32_t N; int32_t j; - if (!pipe_ctx->stream) + if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER)) continue; /* Virtual encoders don't have this function */ if (!stream_enc->funcs->get_fifo_cal_average_level) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 41147da54458..8bb2da13826f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2040,6 +2040,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): *states = ATTR_STATE_SUPPORTED; break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 4bb289f9b4b8..da2860da6018 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2082,36 +2082,41 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context return ret; } +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; - u32 smu_pcie_arg; + uint8_t *table_member1, *table_member2; + uint32_t min_gen_speed, max_gen_speed; + uint32_t min_lane_width, max_lane_width; + uint32_t smu_pcie_arg; int ret, i; - /* PCIE gen speed and lane width override */ - if (!amdgpu_device_pcie_dynamic_switching_supported()) { - if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap) - pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1]; + GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1); + GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2); - if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap) - pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1]; + min_gen_speed = MAX(0, table_member1[0]); + max_gen_speed = MIN(pcie_gen_cap, table_member1[1]); + min_gen_speed = min_gen_speed > max_gen_speed ? + max_gen_speed : min_gen_speed; + min_lane_width = MAX(1, table_member2[0]); + max_lane_width = MIN(pcie_width_cap, table_member2[1]); + min_lane_width = min_lane_width > max_lane_width ? + max_lane_width : min_lane_width; - /* Force all levels to use the same settings */ - for (i = 0; i < NUM_LINK_LEVELS; i++) { - pcie_table->pcie_gen[i] = pcie_gen_cap; - pcie_table->pcie_lane[i] = pcie_width_cap; - } + if (!amdgpu_device_pcie_dynamic_switching_supported()) { + pcie_table->pcie_gen[0] = max_gen_speed; + pcie_table->pcie_lane[0] = max_lane_width; } else { - for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - } + pcie_table->pcie_gen[0] = min_gen_speed; + pcie_table->pcie_lane[0] = min_lane_width; } + pcie_table->pcie_gen[1] = max_gen_speed; + pcie_table->pcie_lane[1] = max_lane_width; for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16 | diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 0cb646cb04ee..d5c15292ae93 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -38,6 +38,14 @@ static const struct drm_dmi_panel_orientation_data gpd_micropc = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data gpd_onemix2s = { + .width = 1200, + .height = 1920, + .bios_dates = (const char * const []){ "05/21/2018", "10/26/2018", + "03/04/2019", NULL }, + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, +}; + static const struct drm_dmi_panel_orientation_data gpd_pocket = { .width = 1200, .height = 1920, @@ -401,6 +409,14 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* One Mix 2S (generic strings, also match on bios date) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), + }, + .driver_data = (void *)&gpd_onemix2s, }, {} }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 6b6d22c19411..0ba955611dfb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) for_each_gt(gt, i915, id) { if (!obj->mm.tlb[id]) - return; + continue; intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]); obj->mm.tlb[id] = 0; diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index a4ff55aa5e55..7ad36198aab2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -271,8 +271,17 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + /* + * L3 fabric flush is needed for AUX CCS invalidation + * which happens as part of pipe-control so we can + * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 + * deals with Protected Memory which is not needed for + * AUX CCS invalidation and lead to unwanted side effects. + */ + if (mode & EMIT_FLUSH) + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; - bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f65bb33dd21..a8551ce322de 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1199,6 +1199,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } + /* + * Register engines early to ensure the engine list is in its final + * rb-tree form, lowering the amount of code that has to deal with + * the intermediate llist state. + */ + intel_engines_driver_register(dev_priv); + return 0; /* @@ -1246,8 +1253,6 @@ err_unlock: void i915_gem_driver_register(struct drm_i915_private *i915) { i915_gem_driver_register__shrinker(i915); - - intel_engines_driver_register(i915); } void i915_gem_driver_unregister(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 30afbec9e3b1..2edd7bb13fae 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -31,6 +31,7 @@ #include "nouveau_drv.h" #include "nouveau_dma.h" +#include "nouveau_exec.h" #include "nouveau_gem.h" #include "nouveau_chan.h" #include "nouveau_abi16.h" @@ -183,6 +184,20 @@ nouveau_abi16_fini(struct nouveau_abi16 *abi16) cli->abi16 = NULL; } +static inline int +getparam_dma_ib_max(struct nvif_device *device) +{ + const struct nvif_mclass dmas[] = { + { NV03_CHANNEL_DMA, 0 }, + { NV10_CHANNEL_DMA, 0 }, + { NV17_CHANNEL_DMA, 0 }, + { NV40_CHANNEL_DMA, 0 }, + {} + }; + + return nvif_mclass(&device->object, dmas) < 0 ? NV50_DMA_IB_MAX : 0; +} + int nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) { @@ -247,6 +262,12 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) case NOUVEAU_GETPARAM_GRAPH_UNITS: getparam->value = nvkm_gr_units(gr); break; + case NOUVEAU_GETPARAM_EXEC_PUSH_MAX: { + int ib_max = getparam_dma_ib_max(device); + + getparam->value = nouveau_exec_push_max_from_ib_max(ib_max); + break; + } default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index bb3d6e5c122f..7c97b2886807 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -257,10 +257,7 @@ static int nouveau_channel_ctor(struct nouveau_drm *drm, struct nvif_device *device, bool priv, u64 runm, struct nouveau_channel **pchan) { - static const struct { - s32 oclass; - int version; - } hosts[] = { + const struct nvif_mclass hosts[] = { { AMPERE_CHANNEL_GPFIFO_B, 0 }, { AMPERE_CHANNEL_GPFIFO_A, 0 }, { TURING_CHANNEL_GPFIFO_A, 0 }, @@ -443,9 +440,11 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) } /* initialise dma tracking parameters */ - switch (chan->user.oclass & 0x00ff) { - case 0x006b: - case 0x006e: + switch (chan->user.oclass) { + case NV03_CHANNEL_DMA: + case NV10_CHANNEL_DMA: + case NV17_CHANNEL_DMA: + case NV40_CHANNEL_DMA: chan->user_put = 0x40; chan->user_get = 0x44; chan->dma.max = (0x10000 / 4) - 2; @@ -455,7 +454,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart) chan->user_get = 0x44; chan->user_get_hi = 0x60; chan->dma.ib_base = 0x10000 / 4; - chan->dma.ib_max = (0x02000 / 8) - 1; + chan->dma.ib_max = NV50_DMA_IB_MAX; chan->dma.ib_put = 0; chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put; chan->dma.max = chan->dma.ib_base; diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 1744d95b233e..c52cda82353e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -49,6 +49,9 @@ void nv50_dma_push(struct nouveau_channel *, u64 addr, u32 length, /* Maximum push buffer size. */ #define NV50_DMA_PUSH_MAX_LENGTH 0x7fffff +/* Maximum IBs per ring. */ +#define NV50_DMA_IB_MAX ((0x02000 / 8) - 1) + /* Object handles - for stuff that's doesn't use handle == oclass. */ enum { NvDmaFB = 0x80000002, diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 5dda94e1318c..c1837ba95fb5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -379,7 +379,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, struct nouveau_channel *chan = NULL; struct nouveau_exec_job_args args = {}; struct drm_nouveau_exec *req = data; - int ret = 0; + int push_max, ret = 0; if (unlikely(!abi16)) return -ENOMEM; @@ -404,9 +404,10 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (!chan->dma.ib_max) return nouveau_abi16_put(abi16, -ENOSYS); - if (unlikely(req->push_count > NOUVEAU_GEM_MAX_PUSH)) { + push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max); + if (unlikely(req->push_count > push_max)) { NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n", - req->push_count, NOUVEAU_GEM_MAX_PUSH); + req->push_count, push_max); return nouveau_abi16_put(abi16, -EINVAL); } diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h index 778cacd90f65..5488d337bcc0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.h +++ b/drivers/gpu/drm/nouveau/nouveau_exec.h @@ -51,4 +51,14 @@ int nouveau_exec_job_init(struct nouveau_exec_job **job, int nouveau_exec_ioctl_exec(struct drm_device *dev, void *data, struct drm_file *file_priv); +static inline unsigned int +nouveau_exec_push_max_from_ib_max(int ib_max) +{ + /* Limit the number of IBs per job to half the size of the ring in order + * to avoid the ring running dry between submissions and preserve one + * more slot for the job's HW fence. + */ + return ib_max > 1 ? ib_max / 2 - 1 : 0; +} + #endif diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index c1dfbfcaa000..bccb33b900f3 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -118,7 +118,7 @@ void drm_kunit_helper_free_device(struct kunit *test, struct device *dev) kunit_release_action(test, kunit_action_platform_driver_unregister, - pdev); + &fake_platform_driver); } EXPORT_SYMBOL_GPL(drm_kunit_helper_free_device); diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 0cea301cc9a9..790aa908e2a7 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -799,6 +799,8 @@ config HID_NVIDIA_SHIELD tristate "NVIDIA SHIELD devices" depends on USB_HID depends on BT_HIDP + depends on LEDS_CLASS + select POWER_SUPPLY help Support for NVIDIA SHIELD accessories. diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 403506b9697e..b346d68a06f5 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -130,6 +130,10 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, return -ENODEV; boot_hid = usb_get_intfdata(boot_interface); + if (list_empty(&boot_hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } boot_hid_input = list_first_entry(&boot_hid->inputs, struct hid_input, list); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7e499992a793..e4d2dfd5d253 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -425,6 +425,7 @@ #define I2C_DEVICE_ID_HP_SPECTRE_X360_13T_AW100 0x29F5 #define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V1 0x2BED #define I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2 0x2BEE +#define I2C_DEVICE_ID_HP_ENVY_X360_15_EU0556NG 0x2D02 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 0235cc1690a1..c8b20d44b147 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -409,6 +409,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_14T_EA100_V2), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15_EU0556NG), + HID_BATTERY_QUIRK_IGNORE }, {} }; diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 05f5b5f588a2..a209d51bd247 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4515,7 +4515,8 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) goto hid_hw_init_fail; } - hidpp_connect_event(hidpp); + schedule_work(&hidpp->work); + flush_work(&hidpp->work); if (will_restart) { /* Reset the HID node state */ @@ -4677,6 +4678,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) }, { /* MX Master mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) }, + { /* M720 Triathlon mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb015) }, { /* MX Ergo trackball over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 521b2ffb4244..8db4ae05febc 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2146,6 +2146,10 @@ static const struct hid_device_id mt_devices[] = { /* Synaptics devices */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, + + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 250f5d2f888a..10468f727e5b 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -2088,7 +2088,9 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) struct joycon_input_report *report; req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO; + mutex_lock(&ctlr->output_mutex); ret = joycon_send_subcmd(ctlr, &req, 0, HZ); + mutex_unlock(&ctlr->output_mutex); if (ret) { hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret); return ret; @@ -2117,6 +2119,85 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) return 0; } +static int joycon_init(struct hid_device *hdev) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + int ret = 0; + + mutex_lock(&ctlr->output_mutex); + /* if handshake command fails, assume ble pro controller */ + if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) && + !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) { + hid_dbg(hdev, "detected USB controller\n"); + /* set baudrate for improved latency */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ); + if (ret) { + hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); + goto out_unlock; + } + /* handshake */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ); + if (ret) { + hid_err(hdev, "Failed handshake; ret=%d\n", ret); + goto out_unlock; + } + /* + * Set no timeout (to keep controller in USB mode). + * This doesn't send a response, so ignore the timeout. + */ + joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10); + } else if (jc_type_is_chrggrip(ctlr)) { + hid_err(hdev, "Failed charging grip handshake\n"); + ret = -ETIMEDOUT; + goto out_unlock; + } + + /* get controller calibration data, and parse it */ + ret = joycon_request_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Analog stick positions may be inaccurate\n"); + } + + /* get IMU calibration data, and parse it */ + ret = joycon_request_imu_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Unable to read IMU calibration data\n"); + } + + /* Set the reporting mode to 0x30, which is the full report mode */ + ret = joycon_set_report_mode(ctlr); + if (ret) { + hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); + goto out_unlock; + } + + /* Enable rumble */ + ret = joycon_enable_rumble(ctlr); + if (ret) { + hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret); + goto out_unlock; + } + + /* Enable the IMU */ + ret = joycon_enable_imu(ctlr); + if (ret) { + hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret); + goto out_unlock; + } + +out_unlock: + mutex_unlock(&ctlr->output_mutex); + return ret; +} + /* Common handler for parsing inputs */ static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data, int size) @@ -2248,85 +2329,19 @@ static int nintendo_hid_probe(struct hid_device *hdev, hid_device_io_start(hdev); - /* Initialize the controller */ - mutex_lock(&ctlr->output_mutex); - /* if handshake command fails, assume ble pro controller */ - if ((jc_type_is_procon(ctlr) || jc_type_is_chrggrip(ctlr)) && - !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ)) { - hid_dbg(hdev, "detected USB controller\n"); - /* set baudrate for improved latency */ - ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M, HZ); - if (ret) { - hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); - goto err_mutex; - } - /* handshake */ - ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE, HZ); - if (ret) { - hid_err(hdev, "Failed handshake; ret=%d\n", ret); - goto err_mutex; - } - /* - * Set no timeout (to keep controller in USB mode). - * This doesn't send a response, so ignore the timeout. - */ - joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT, HZ/10); - } else if (jc_type_is_chrggrip(ctlr)) { - hid_err(hdev, "Failed charging grip handshake\n"); - ret = -ETIMEDOUT; - goto err_mutex; - } - - /* get controller calibration data, and parse it */ - ret = joycon_request_calibration(ctlr); + ret = joycon_init(hdev); if (ret) { - /* - * We can function with default calibration, but it may be - * inaccurate. Provide a warning, and continue on. - */ - hid_warn(hdev, "Analog stick positions may be inaccurate\n"); - } - - /* get IMU calibration data, and parse it */ - ret = joycon_request_imu_calibration(ctlr); - if (ret) { - /* - * We can function with default calibration, but it may be - * inaccurate. Provide a warning, and continue on. - */ - hid_warn(hdev, "Unable to read IMU calibration data\n"); - } - - /* Set the reporting mode to 0x30, which is the full report mode */ - ret = joycon_set_report_mode(ctlr); - if (ret) { - hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); - goto err_mutex; - } - - /* Enable rumble */ - ret = joycon_enable_rumble(ctlr); - if (ret) { - hid_err(hdev, "Failed to enable rumble; ret=%d\n", ret); - goto err_mutex; - } - - /* Enable the IMU */ - ret = joycon_enable_imu(ctlr); - if (ret) { - hid_err(hdev, "Failed to enable the IMU; ret=%d\n", ret); - goto err_mutex; + hid_err(hdev, "Failed to initialize controller; ret=%d\n", ret); + goto err_close; } ret = joycon_read_info(ctlr); if (ret) { hid_err(hdev, "Failed to retrieve controller info; ret=%d\n", ret); - goto err_mutex; + goto err_close; } - mutex_unlock(&ctlr->output_mutex); - /* Initialize the leds */ ret = joycon_leds_create(ctlr); if (ret) { @@ -2352,8 +2367,6 @@ static int nintendo_hid_probe(struct hid_device *hdev, hid_dbg(hdev, "probe - success\n"); return 0; -err_mutex: - mutex_unlock(&ctlr->output_mutex); err_close: hid_hw_close(hdev); err_stop: @@ -2383,6 +2396,20 @@ static void nintendo_hid_remove(struct hid_device *hdev) hid_hw_stop(hdev); } +#ifdef CONFIG_PM + +static int nintendo_hid_resume(struct hid_device *hdev) +{ + int ret = joycon_init(hdev); + + if (ret) + hid_err(hdev, "Failed to restore controller after resume"); + + return ret; +} + +#endif + static const struct hid_device_id nintendo_hid_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_PROCON) }, @@ -2404,6 +2431,10 @@ static struct hid_driver nintendo_hid_driver = { .probe = nintendo_hid_probe, .remove = nintendo_hid_remove, .raw_event = nintendo_hid_event, + +#ifdef CONFIG_PM + .resume = nintendo_hid_resume, +#endif }; module_hid_driver(nintendo_hid_driver); diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index 9a3576dbf421..c463e54decbc 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -801,7 +801,7 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); led->max_brightness = 1; - led->flags = LED_CORE_SUSPENDRESUME; + led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; led->brightness_set = &thunderstrike_led_set_brightness; @@ -1058,7 +1058,7 @@ static int shield_probe(struct hid_device *hdev, const struct hid_device_id *id) ret = hid_hw_start(hdev, HID_CONNECT_HIDINPUT); if (ret) { hid_err(hdev, "Failed to start HID device\n"); - goto err_haptics; + goto err_ts_create; } ret = hid_hw_open(hdev); @@ -1073,9 +1073,12 @@ static int shield_probe(struct hid_device *hdev, const struct hid_device_id *id) err_stop: hid_hw_stop(hdev); -err_haptics: +err_ts_create: + power_supply_unregister(ts->base.battery_dev.psy); if (ts->haptics_dev) input_unregister_device(ts->haptics_dev); + led_classdev_unregister(&ts->led_dev); + ida_free(&thunderstrike_ida, ts->id); return ret; } diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index dd942061fd77..ebc0aa4e4345 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2155,6 +2155,8 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) return ret; err: + usb_free_urb(sc->ghl_urb); + hid_hw_stop(hdev); return ret; } diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c index 43d2cf7153d7..b3edadf42d6d 100644 --- a/drivers/hid/hid-steelseries.c +++ b/drivers/hid/hid-steelseries.c @@ -390,7 +390,7 @@ static int steelseries_headset_arctis_1_fetch_battery(struct hid_device *hdev) ret = hid_hw_raw_request(hdev, arctis_1_battery_request[0], write_buf, sizeof(arctis_1_battery_request), HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); - if (ret < sizeof(arctis_1_battery_request)) { + if (ret < (int)sizeof(arctis_1_battery_request)) { hid_err(hdev, "hid_hw_raw_request() failed with %d\n", ret); ret = -ENODATA; } diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 9601c0605fd9..2735cd585af0 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -998,45 +998,29 @@ static int i2c_hid_core_resume(struct i2c_hid *ihid) return hid_driver_reset_resume(hid); } -/** - * __do_i2c_hid_core_initial_power_up() - First time power up of the i2c-hid device. - * @ihid: The ihid object created during probe. - * - * This function is called at probe time. - * - * The initial power on is where we do some basic validation that the device - * exists, where we fetch the HID descriptor, and where we create the actual - * HID devices. - * - * Return: 0 or error code. +/* + * Check that the device exists and parse the HID descriptor. */ -static int __do_i2c_hid_core_initial_power_up(struct i2c_hid *ihid) +static int __i2c_hid_core_probe(struct i2c_hid *ihid) { struct i2c_client *client = ihid->client; struct hid_device *hid = ihid->hid; int ret; - ret = i2c_hid_core_power_up(ihid); - if (ret) - return ret; - /* Make sure there is something at this address */ ret = i2c_smbus_read_byte(client); if (ret < 0) { i2c_hid_dbg(ihid, "nothing at this address: %d\n", ret); - ret = -ENXIO; - goto err; + return -ENXIO; } ret = i2c_hid_fetch_hid_descriptor(ihid); if (ret < 0) { dev_err(&client->dev, "Failed to fetch the HID Descriptor\n"); - goto err; + return ret; } - enable_irq(client->irq); - hid->version = le16_to_cpu(ihid->hdesc.bcdVersion); hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID); hid->product = le16_to_cpu(ihid->hdesc.wProductID); @@ -1050,17 +1034,49 @@ static int __do_i2c_hid_core_initial_power_up(struct i2c_hid *ihid) ihid->quirks = i2c_hid_lookup_quirk(hid->vendor, hid->product); + return 0; +} + +static int i2c_hid_core_register_hid(struct i2c_hid *ihid) +{ + struct i2c_client *client = ihid->client; + struct hid_device *hid = ihid->hid; + int ret; + + enable_irq(client->irq); + ret = hid_add_device(hid); if (ret) { if (ret != -ENODEV) hid_err(client, "can't add hid device: %d\n", ret); - goto err; + disable_irq(client->irq); + return ret; } return 0; +} + +static int i2c_hid_core_probe_panel_follower(struct i2c_hid *ihid) +{ + int ret; + + ret = i2c_hid_core_power_up(ihid); + if (ret) + return ret; -err: + ret = __i2c_hid_core_probe(ihid); + if (ret) + goto err_power_down; + + ret = i2c_hid_core_register_hid(ihid); + if (ret) + goto err_power_down; + + return 0; + +err_power_down: i2c_hid_core_power_down(ihid); + return ret; } @@ -1077,7 +1093,7 @@ static void ihid_core_panel_prepare_work(struct work_struct *work) * steps. */ if (!hid->version) - ret = __do_i2c_hid_core_initial_power_up(ihid); + ret = i2c_hid_core_probe_panel_follower(ihid); else ret = i2c_hid_core_resume(ihid); @@ -1136,7 +1152,6 @@ static int i2c_hid_core_register_panel_follower(struct i2c_hid *ihid) struct device *dev = &ihid->client->dev; int ret; - ihid->is_panel_follower = true; ihid->panel_follower.funcs = &i2c_hid_core_panel_follower_funcs; /* @@ -1156,30 +1171,6 @@ static int i2c_hid_core_register_panel_follower(struct i2c_hid *ihid) return 0; } -static int i2c_hid_core_initial_power_up(struct i2c_hid *ihid) -{ - /* - * If we're a panel follower, we'll register and do our initial power - * up when the panel turns on; otherwise we do it right away. - */ - if (drm_is_panel_follower(&ihid->client->dev)) - return i2c_hid_core_register_panel_follower(ihid); - else - return __do_i2c_hid_core_initial_power_up(ihid); -} - -static void i2c_hid_core_final_power_down(struct i2c_hid *ihid) -{ - /* - * If we're a follower, the act of unfollowing will cause us to be - * powered down. Otherwise we need to manually do it. - */ - if (ihid->is_panel_follower) - drm_panel_remove_follower(&ihid->panel_follower); - else - i2c_hid_core_suspend(ihid, true); -} - int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, u16 hid_descriptor_address, u32 quirks) { @@ -1211,6 +1202,7 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, ihid->ops = ops; ihid->client = client; ihid->wHIDDescRegister = cpu_to_le16(hid_descriptor_address); + ihid->is_panel_follower = drm_is_panel_follower(&client->dev); init_waitqueue_head(&ihid->wait); mutex_init(&ihid->reset_lock); @@ -1224,14 +1216,10 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, return ret; device_enable_async_suspend(&client->dev); - ret = i2c_hid_init_irq(client); - if (ret < 0) - goto err_buffers_allocated; - hid = hid_allocate_device(); if (IS_ERR(hid)) { ret = PTR_ERR(hid); - goto err_irq; + goto err_free_buffers; } ihid->hid = hid; @@ -1242,19 +1230,42 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops, hid->bus = BUS_I2C; hid->initial_quirks = quirks; - ret = i2c_hid_core_initial_power_up(ihid); + /* Power on and probe unless device is a panel follower. */ + if (!ihid->is_panel_follower) { + ret = i2c_hid_core_power_up(ihid); + if (ret < 0) + goto err_destroy_device; + + ret = __i2c_hid_core_probe(ihid); + if (ret < 0) + goto err_power_down; + } + + ret = i2c_hid_init_irq(client); + if (ret < 0) + goto err_power_down; + + /* + * If we're a panel follower, we'll register when the panel turns on; + * otherwise we do it right away. + */ + if (ihid->is_panel_follower) + ret = i2c_hid_core_register_panel_follower(ihid); + else + ret = i2c_hid_core_register_hid(ihid); if (ret) - goto err_mem_free; + goto err_free_irq; return 0; -err_mem_free: - hid_destroy_device(hid); - -err_irq: +err_free_irq: free_irq(client->irq, ihid); - -err_buffers_allocated: +err_power_down: + if (!ihid->is_panel_follower) + i2c_hid_core_power_down(ihid); +err_destroy_device: + hid_destroy_device(hid); +err_free_buffers: i2c_hid_free_buffers(ihid); return ret; @@ -1266,7 +1277,14 @@ void i2c_hid_core_remove(struct i2c_client *client) struct i2c_hid *ihid = i2c_get_clientdata(client); struct hid_device *hid; - i2c_hid_core_final_power_down(ihid); + /* + * If we're a follower, the act of unfollowing will cause us to be + * powered down. Otherwise we need to manually do it. + */ + if (ihid->is_panel_follower) + drm_panel_remove_follower(&ihid->panel_follower); + else + i2c_hid_core_suspend(ihid, true); hid = ihid->hid; hid_destroy_device(hid); diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 55cb25038e63..710fda5f19e1 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -133,6 +133,14 @@ static int enable_gpe(struct device *dev) } wakeup = &adev->wakeup; + /* + * Call acpi_disable_gpe(), so that reference count + * gpe_event_info->runtime_count doesn't overflow. + * When gpe_event_info->runtime_count = 0, the call + * to acpi_disable_gpe() simply return. + */ + acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number); + acpi_sts = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number); if (ACPI_FAILURE(acpi_sts)) { dev_err(dev, "enable ose_gpe failed\n"); diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 87283e4a4607..959ec5269376 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -22,6 +22,7 @@ static DEFINE_IDR(i3c_bus_idr); static DEFINE_MUTEX(i3c_core_lock); static int __i3c_first_dynamic_bus_num; +static BLOCKING_NOTIFIER_HEAD(i3c_bus_notifier); /** * i3c_bus_maintenance_lock - Lock the bus for a maintenance operation @@ -453,6 +454,36 @@ static int i3c_bus_init(struct i3c_bus *i3cbus, struct device_node *np) return 0; } +void i3c_for_each_bus_locked(int (*fn)(struct i3c_bus *bus, void *data), + void *data) +{ + struct i3c_bus *bus; + int id; + + mutex_lock(&i3c_core_lock); + idr_for_each_entry(&i3c_bus_idr, bus, id) + fn(bus, data); + mutex_unlock(&i3c_core_lock); +} +EXPORT_SYMBOL_GPL(i3c_for_each_bus_locked); + +int i3c_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&i3c_bus_notifier, nb); +} +EXPORT_SYMBOL_GPL(i3c_register_notifier); + +int i3c_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&i3c_bus_notifier, nb); +} +EXPORT_SYMBOL_GPL(i3c_unregister_notifier); + +static void i3c_bus_notify(struct i3c_bus *bus, unsigned int action) +{ + blocking_notifier_call_chain(&i3c_bus_notifier, action, bus); +} + static const char * const i3c_bus_mode_strings[] = { [I3C_BUS_MODE_PURE] = "pure", [I3C_BUS_MODE_MIXED_FAST] = "mixed-fast", @@ -2682,6 +2713,8 @@ int i3c_master_register(struct i3c_master_controller *master, if (ret) goto err_del_dev; + i3c_bus_notify(i3cbus, I3C_NOTIFY_BUS_ADD); + /* * We're done initializing the bus and the controller, we can now * register I3C devices discovered during the initial DAA. @@ -2714,6 +2747,8 @@ EXPORT_SYMBOL_GPL(i3c_master_register); */ void i3c_master_unregister(struct i3c_master_controller *master) { + i3c_bus_notify(&master->bus, I3C_NOTIFY_BUS_REMOVE); + i3c_master_i2c_adapter_cleanup(master); i3c_master_unregister_i3c_devs(master); i3c_master_bus_cleanup(master); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c343edf2f664..1e2cd7c8716e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4968,7 +4968,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; - struct ib_sa_multicast ib; + struct ib_sa_multicast ib = {}; enum ib_gid_type gid_type; bool send_only; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7b68b3ea979f..f2fb2d8a6597 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -217,7 +217,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, return -ENOMEM; for (i = 0; i < ports_num; i++) { - char port_str[10]; + char port_str[11]; ports[i].port_num = i + 1; snprintf(port_str, sizeof(port_str), "%u", i + 1); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index d5d3e4f0de77..6d1dbc978759 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2529,6 +2529,7 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, [RDMA_NLDEV_CMD_SYS_SET] = { .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_SET] = { .doit = nldev_stat_set_doit, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index bf800f8cb3e4..495d5a5d0373 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -546,7 +546,7 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (hdr->in_words * 4 != count) return -EINVAL; - if (count < method_elm->req_size + sizeof(hdr)) { + if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0848c2c2ffcf..faa88d12ee86 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -910,6 +910,10 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) list_del(&qp->list); mutex_unlock(&rdev->qp_lock); atomic_dec(&rdev->stats.res.qp_count); + if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC) + atomic_dec(&rdev->stats.res.rc_qp_count); + else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) + atomic_dec(&rdev->stats.res.ud_qp_count); ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index c8c4017fe405..e47b4ca64d33 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -665,7 +665,6 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; crsqe = &rcfw->crsqe_tbl[cookie]; - crsqe->is_in_used = false; if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags), @@ -681,8 +680,14 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, atomic_dec(&rcfw->timeout_send); if (crsqe->is_waiter_alive) { - if (crsqe->resp) + if (crsqe->resp) { memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); + /* Insert write memory barrier to ensure that + * response data is copied before clearing the + * flags + */ + smp_wmb(); + } if (!blocked) wait_cmds++; } @@ -694,6 +699,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, if (!is_waiter_alive) crsqe->resp = NULL; + crsqe->is_in_used = false; + hwq->cons += req_size; /* This is a case to handle below scenario - diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ced615b5ea09..040ba2224f9f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1965,6 +1965,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) int win; skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) + return -ENOMEM; + req = __skb_put_zero(skb, sizeof(*req)); req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR)); req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index dcccb6015232..c317947563fb 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -133,8 +133,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { struct erdma_pd *pd = to_epd(mr->ibmr.pd); + u32 mtt_level = ERDMA_MR_MTT_0LEVEL; struct erdma_cmdq_reg_mr_req req; - u32 mtt_level; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); @@ -147,10 +147,9 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); mtt_level = mr->mem.mtt->level; } - } else { + } else if (mr->type != ERDMA_MR_TYPE_DMA) { memcpy(req.phy_addr, mr->mem.mtt->buf, MTT_SIZE(mr->mem.page_cnt)); - mtt_level = ERDMA_MR_MTT_0LEVEL; } req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | @@ -655,7 +654,7 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); if (!mtt) - return NULL; + return ERR_PTR(-ENOMEM); mtt->size = ALIGN(size, PAGE_SIZE); mtt->buf = vzalloc(mtt->size); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 24ee79aa2122..88f534cf690e 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -223,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, static int add_port_entries(struct mlx4_ib_dev *device, int port_num) { int i; - char buff[11]; + char buff[12]; struct mlx4_ib_iov_port *port = NULL; int ret = 0 ; struct ib_port_attr attr; diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 1e419e080b53..520034acf73a 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2470,8 +2470,8 @@ destroy_res: mlx5_steering_anchor_destroy_res(ft_prio); put_flow_table: put_flow_table(dev, ft_prio, true); - mutex_unlock(&dev->flow_db->lock); free_obj: + mutex_unlock(&dev->flow_db->lock); kfree(obj); return err; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aed5cdea50e6..e0e0451777ee 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -24,6 +24,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/eswitch.h> +#include <linux/mlx5/driver.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem_odp.h> @@ -2084,7 +2085,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) case MLX5_IB_MMAP_DEVICE_MEM: return "Device Memory"; default: - return NULL; + return "Unknown"; } } @@ -3175,6 +3176,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_core_mp_event_replay(ibdev->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + NULL); + mlx5_core_mp_event_replay(mpi->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + NULL); + mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); spin_lock(&port->mp.mpi_lock); @@ -3226,6 +3234,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + u64 key; int err; lockdep_assert_held(&mlx5_ib_multiport_mutex); @@ -3254,6 +3263,14 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_init_cong_debugfs(ibdev, port_num); + key = ibdev->ib_dev.index; + mlx5_core_mp_event_replay(mpi->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + &key); + mlx5_core_mp_event_replay(ibdev->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + &key); + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3e345ef380f1..8a3762d9ff58 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -301,7 +301,8 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) { - set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); + set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, + ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); @@ -1024,19 +1025,26 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) if (!dev->cache.wq) return; - cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); mutex_lock(&dev->cache.rb_lock); for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ent->disabled = true; xa_unlock_irq(&ent->mkeys); - cancel_delayed_work_sync(&ent->dwork); } + mutex_unlock(&dev->cache.rb_lock); + + /* + * After all entries are disabled and will not reschedule on WQ, + * flush it and all async commands. + */ + flush_workqueue(dev->cache.wq); mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); + /* At this point all entries are disabled and have no concurrent work. */ + mutex_lock(&dev->cache.rb_lock); node = rb_first(root); while (node) { ent = rb_entry(node, struct mlx5_cache_ent, node); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index a2605178f4ed..43e776073f49 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -976,6 +976,7 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_put(cep); new_cep->listen_cep = NULL; if (rv) { + siw_cancel_mpatimer(new_cep); siw_cep_set_free(new_cep); goto error; } @@ -1100,9 +1101,12 @@ static void siw_cm_work_handler(struct work_struct *w) /* * Socket close before MPA request received. */ - siw_dbg_cep(cep, "no mpareq: drop listener\n"); - siw_cep_put(cep->listen_cep); - cep->listen_cep = NULL; + if (cep->listen_cep) { + siw_dbg_cep(cep, + "no mpareq: drop listener\n"); + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } } } release_cep = 1; @@ -1227,7 +1231,11 @@ static void siw_cm_llp_data_ready(struct sock *sk) if (!cep) goto out; - siw_dbg_cep(cep, "state: %d\n", cep->state); + siw_dbg_cep(cep, "cep state: %d, socket state %d\n", + cep->state, sk->sk_state); + + if (sk->sk_state != TCP_ESTABLISHED) + goto out; switch (cep->state) { case SIW_EPSTATE_RDMA_MODE: diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ed25061fac62..7f84d9866cef 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -488,7 +488,7 @@ poll_more: if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && - napi_reschedule(napi)) + napi_schedule(napi)) goto poll_more; } @@ -518,7 +518,7 @@ poll_more: napi_complete(napi); if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && - napi_reschedule(napi)) + napi_schedule(napi)) goto poll_more; } return n < 0 ? 0 : n; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1574218764e0..2916e77f589b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2784,7 +2784,6 @@ static int srp_abort(struct scsi_cmnd *scmnd) u32 tag; u16 ch_idx; struct srp_rdma_ch *ch; - int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); @@ -2798,19 +2797,14 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "Sending SRP abort for tag %#x\n", tag); if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, - SRP_TSK_ABORT_TASK, NULL) == 0) - ret = SUCCESS; - else if (target->rport->state == SRP_RPORT_LOST) - ret = FAST_IO_FAIL; - else - ret = FAILED; - if (ret == SUCCESS) { + SRP_TSK_ABORT_TASK, NULL) == 0) { srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scsi_done(scmnd); + return SUCCESS; } + if (target->rport->state == SRP_RPORT_LOST) + return FAST_IO_FAIL; - return ret; + return FAILED; } static int srp_reset_device(struct scsi_cmnd *scmnd) diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index 3db4592cda1c..f407cce9ecaa 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -29,4 +29,8 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data); +#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) +#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) +#define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2) + #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e0c2b10d154d..75a2dd550625 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -44,10 +44,6 @@ #define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) #define ITS_FLAGS_FORCE_NON_SHAREABLE (1ULL << 3) -#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) -#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) -#define RDIST_FLAGS_FORCE_NON_SHAREABLE (1 << 2) - #define RD_LOCAL_LPI_ENABLED BIT(0) #define RD_LOCAL_PENDTABLE_PREALLOCATED BIT(1) #define RD_LOCAL_MEMRESERVE_DONE BIT(2) @@ -4754,6 +4750,14 @@ static bool __maybe_unused its_enable_rk3588001(void *data) return true; } +static bool its_set_non_coherent(void *data) +{ + struct its_node *its = data; + + its->flags |= ITS_FLAGS_FORCE_NON_SHAREABLE; + return true; +} + static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { @@ -4809,6 +4813,11 @@ static const struct gic_quirk its_quirks[] = { }, #endif { + .desc = "ITS: non-coherent attribute", + .property = "dma-noncoherent", + .init = its_set_non_coherent, + }, + { } }; @@ -4817,6 +4826,10 @@ static void its_enable_quirks(struct its_node *its) u32 iidr = readl_relaxed(its->base + GITS_IIDR); gic_enable_quirks(iidr, its_quirks, its); + + if (is_of_node(its->fwnode_handle)) + gic_enable_of_quirks(to_of_node(its->fwnode_handle), + its_quirks, its); } static int its_save_disable(void) @@ -4952,7 +4965,7 @@ out_unmap: return NULL; } -static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) +static int its_init_domain(struct its_node *its) { struct irq_domain *inner_domain; struct msi_domain_info *info; @@ -4966,7 +4979,7 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) inner_domain = irq_domain_create_hierarchy(its_parent, its->msi_domain_flags, 0, - handle, &its_domain_ops, + its->fwnode_handle, &its_domain_ops, info); if (!inner_domain) { kfree(info); @@ -5017,8 +5030,7 @@ static int its_init_vpe_domain(void) return 0; } -static int __init its_compute_its_list_map(struct resource *res, - void __iomem *its_base) +static int __init its_compute_its_list_map(struct its_node *its) { int its_number; u32 ctlr; @@ -5032,15 +5044,15 @@ static int __init its_compute_its_list_map(struct resource *res, its_number = find_first_zero_bit(&its_list_map, GICv4_ITS_LIST_MAX); if (its_number >= GICv4_ITS_LIST_MAX) { pr_err("ITS@%pa: No ITSList entry available!\n", - &res->start); + &its->phys_base); return -EINVAL; } - ctlr = readl_relaxed(its_base + GITS_CTLR); + ctlr = readl_relaxed(its->base + GITS_CTLR); ctlr &= ~GITS_CTLR_ITS_NUMBER; ctlr |= its_number << GITS_CTLR_ITS_NUMBER_SHIFT; - writel_relaxed(ctlr, its_base + GITS_CTLR); - ctlr = readl_relaxed(its_base + GITS_CTLR); + writel_relaxed(ctlr, its->base + GITS_CTLR); + ctlr = readl_relaxed(its->base + GITS_CTLR); if ((ctlr & GITS_CTLR_ITS_NUMBER) != (its_number << GITS_CTLR_ITS_NUMBER_SHIFT)) { its_number = ctlr & GITS_CTLR_ITS_NUMBER; its_number >>= GITS_CTLR_ITS_NUMBER_SHIFT; @@ -5048,75 +5060,50 @@ static int __init its_compute_its_list_map(struct resource *res, if (test_and_set_bit(its_number, &its_list_map)) { pr_err("ITS@%pa: Duplicate ITSList entry %d\n", - &res->start, its_number); + &its->phys_base, its_number); return -EINVAL; } return its_number; } -static int __init its_probe_one(struct resource *res, - struct fwnode_handle *handle, int numa_node) +static int __init its_probe_one(struct its_node *its) { - struct its_node *its; - void __iomem *its_base; - u64 baser, tmp, typer; + u64 baser, tmp; struct page *page; u32 ctlr; int err; - its_base = its_map_one(res, &err); - if (!its_base) - return err; - - pr_info("ITS %pR\n", res); - - its = kzalloc(sizeof(*its), GFP_KERNEL); - if (!its) { - err = -ENOMEM; - goto out_unmap; - } - - raw_spin_lock_init(&its->lock); - mutex_init(&its->dev_alloc_lock); - INIT_LIST_HEAD(&its->entry); - INIT_LIST_HEAD(&its->its_device_list); - typer = gic_read_typer(its_base + GITS_TYPER); - its->typer = typer; - its->base = its_base; - its->phys_base = res->start; if (is_v4(its)) { - if (!(typer & GITS_TYPER_VMOVP)) { - err = its_compute_its_list_map(res, its_base); + if (!(its->typer & GITS_TYPER_VMOVP)) { + err = its_compute_its_list_map(its); if (err < 0) - goto out_free_its; + goto out; its->list_nr = err; pr_info("ITS@%pa: Using ITS number %d\n", - &res->start, err); + &its->phys_base, err); } else { - pr_info("ITS@%pa: Single VMOVP capable\n", &res->start); + pr_info("ITS@%pa: Single VMOVP capable\n", &its->phys_base); } if (is_v4_1(its)) { - u32 svpet = FIELD_GET(GITS_TYPER_SVPET, typer); + u32 svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer); - its->sgir_base = ioremap(res->start + SZ_128K, SZ_64K); + its->sgir_base = ioremap(its->phys_base + SZ_128K, SZ_64K); if (!its->sgir_base) { err = -ENOMEM; - goto out_free_its; + goto out; } - its->mpidr = readl_relaxed(its_base + GITS_MPIDR); + its->mpidr = readl_relaxed(its->base + GITS_MPIDR); pr_info("ITS@%pa: Using GICv4.1 mode %08x %08x\n", - &res->start, its->mpidr, svpet); + &its->phys_base, its->mpidr, svpet); } } - its->numa_node = numa_node; - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, get_order(ITS_CMD_QUEUE_SZ)); if (!page) { @@ -5125,12 +5112,9 @@ static int __init its_probe_one(struct resource *res, } its->cmd_base = (void *)page_address(page); its->cmd_write = its->cmd_base; - its->fwnode_handle = handle; its->get_msi_base = its_irq_get_msi_base; its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI; - its_enable_quirks(its); - err = its_alloc_tables(its); if (err) goto out_free_cmd; @@ -5174,7 +5158,7 @@ static int __init its_probe_one(struct resource *res, ctlr |= GITS_CTLR_ImDe; writel_relaxed(ctlr, its->base + GITS_CTLR); - err = its_init_domain(handle, its); + err = its_init_domain(its); if (err) goto out_free_tables; @@ -5191,11 +5175,8 @@ out_free_cmd: out_unmap_sgir: if (its->sgir_base) iounmap(its->sgir_base); -out_free_its: - kfree(its); -out_unmap: - iounmap(its_base); - pr_err("ITS@%pa: failed probing (%d)\n", &res->start, err); +out: + pr_err("ITS@%pa: failed probing (%d)\n", &its->phys_base, err); return err; } @@ -5356,10 +5337,53 @@ static const struct of_device_id its_device_id[] = { {}, }; +static struct its_node __init *its_node_init(struct resource *res, + struct fwnode_handle *handle, int numa_node) +{ + void __iomem *its_base; + struct its_node *its; + int err; + + its_base = its_map_one(res, &err); + if (!its_base) + return NULL; + + pr_info("ITS %pR\n", res); + + its = kzalloc(sizeof(*its), GFP_KERNEL); + if (!its) + goto out_unmap; + + raw_spin_lock_init(&its->lock); + mutex_init(&its->dev_alloc_lock); + INIT_LIST_HEAD(&its->entry); + INIT_LIST_HEAD(&its->its_device_list); + + its->typer = gic_read_typer(its_base + GITS_TYPER); + its->base = its_base; + its->phys_base = res->start; + + its->numa_node = numa_node; + its->fwnode_handle = handle; + + return its; + +out_unmap: + iounmap(its_base); + return NULL; +} + +static void its_node_destroy(struct its_node *its) +{ + iounmap(its->base); + kfree(its); +} + static int __init its_of_probe(struct device_node *node) { struct device_node *np; struct resource res; + int err; /* * Make sure *all* the ITS are reset before we probe any, as @@ -5369,8 +5393,6 @@ static int __init its_of_probe(struct device_node *node) */ for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { - int err; - if (!of_device_is_available(np) || !of_property_read_bool(np, "msi-controller") || of_address_to_resource(np, 0, &res)) @@ -5383,6 +5405,8 @@ static int __init its_of_probe(struct device_node *node) for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { + struct its_node *its; + if (!of_device_is_available(np)) continue; if (!of_property_read_bool(np, "msi-controller")) { @@ -5396,7 +5420,17 @@ static int __init its_of_probe(struct device_node *node) continue; } - its_probe_one(&res, &np->fwnode, of_node_to_nid(np)); + + its = its_node_init(&res, &np->fwnode, of_node_to_nid(np)); + if (!its) + return -ENOMEM; + + its_enable_quirks(its); + err = its_probe_one(its); + if (err) { + its_node_destroy(its); + return err; + } } return 0; } @@ -5508,6 +5542,7 @@ static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, { struct acpi_madt_generic_translator *its_entry; struct fwnode_handle *dom_handle; + struct its_node *its; struct resource res; int err; @@ -5532,11 +5567,18 @@ static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, goto dom_err; } - err = its_probe_one(&res, dom_handle, - acpi_get_its_numa_node(its_entry->translation_id)); + its = its_node_init(&res, dom_handle, + acpi_get_its_numa_node(its_entry->translation_id)); + if (!its) { + err = -ENOMEM; + goto node_err; + } + + err = its_probe_one(its); if (!err) return 0; +node_err: iort_deregister_domain_token(its_entry->translation_id); dom_err: irq_domain_free_fwnode(dom_handle); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..f59ac9586b7b 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1857,6 +1857,14 @@ static bool gic_enable_quirk_arm64_2941627(void *data) return true; } +static bool rd_set_non_coherent(void *data) +{ + struct gic_chip_data *d = data; + + d->rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; + return true; +} + static const struct gic_quirk gic_quirks[] = { { .desc = "GICv3: Qualcomm MSM8996 broken firmware", @@ -1924,6 +1932,11 @@ static const struct gic_quirk gic_quirks[] = { .init = gic_enable_quirk_arm64_2941627, }, { + .desc = "GICv3: non-coherent attribute", + .property = "dma-noncoherent", + .init = rd_set_non_coherent, + }, + { } }; diff --git a/drivers/irqchip/irq-renesas-rzg2l.c b/drivers/irqchip/irq-renesas-rzg2l.c index 4bbfa2b0a4df..96f4e322ed6b 100644 --- a/drivers/irqchip/irq-renesas-rzg2l.c +++ b/drivers/irqchip/irq-renesas-rzg2l.c @@ -118,7 +118,7 @@ static void rzg2l_irqc_irq_disable(struct irq_data *d) raw_spin_lock(&priv->lock); reg = readl_relaxed(priv->base + TSSR(tssr_index)); - reg &= ~(TSSEL_MASK << tssr_offset); + reg &= ~(TSSEL_MASK << TSSEL_SHIFT(tssr_offset)); writel_relaxed(reg, priv->base + TSSR(tssr_index)); raw_spin_unlock(&priv->lock); } @@ -130,8 +130,8 @@ static void rzg2l_irqc_irq_enable(struct irq_data *d) unsigned int hw_irq = irqd_to_hwirq(d); if (hw_irq >= IRQC_TINT_START && hw_irq < IRQC_NUM_IRQ) { + unsigned long tint = (uintptr_t)irq_data_get_irq_chip_data(d); struct rzg2l_irqc_priv *priv = irq_data_to_priv(d); - unsigned long tint = (uintptr_t)d->chip_data; u32 offset = hw_irq - IRQC_TINT_START; u32 tssr_offset = TSSR_OFFSET(offset); u8 tssr_index = TSSR_INDEX(offset); diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index 4adeee1bc391..e8d01b14ccdd 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -155,8 +155,16 @@ static int __init riscv_intc_init(struct device_node *node, * for each INTC DT node. We only need to do INTC initialization * for the INTC DT node belonging to boot CPU (or boot HART). */ - if (riscv_hartid_to_cpuid(hartid) != smp_processor_id()) + if (riscv_hartid_to_cpuid(hartid) != smp_processor_id()) { + /* + * The INTC nodes of each CPU are suppliers for downstream + * interrupt controllers (such as PLIC, IMSIC and APLIC + * direct-mode) so we should mark an INTC node as initialized + * if we are not creating IRQ domain for it. + */ + fwnode_dev_initialized(of_fwnode_handle(node), true); return 0; + } return riscv_intc_init_common(of_node_to_fwnode(node)); } diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index d8ba5fba7450..971240e2e31b 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -460,6 +460,7 @@ static const struct irq_domain_ops irq_exti_domain_ops = { .map = irq_map_generic_chip, .alloc = stm32_exti_alloc, .free = stm32_exti_free, + .xlate = irq_domain_xlate_twocell, }; static void stm32_irq_ack(struct irq_data *d) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index a32c0d28d038..74b2f124116e 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -22,9 +22,20 @@ #define PDC_MAX_GPIO_IRQS 256 +/* Valid only on HW version < 3.2 */ #define IRQ_ENABLE_BANK 0x10 #define IRQ_i_CFG 0x110 +/* Valid only on HW version >= 3.2 */ +#define IRQ_i_CFG_IRQ_ENABLE 3 + +#define IRQ_i_CFG_TYPE_MASK GENMASK(2, 0) + +#define PDC_VERSION_REG 0x1000 + +/* Notable PDC versions */ +#define PDC_VERSION_3_2 0x30200 + struct pdc_pin_region { u32 pin_base; u32 parent_base; @@ -37,6 +48,7 @@ static DEFINE_RAW_SPINLOCK(pdc_lock); static void __iomem *pdc_base; static struct pdc_pin_region *pdc_region; static int pdc_region_cnt; +static unsigned int pdc_version; static void pdc_reg_write(int reg, u32 i, u32 val) { @@ -48,20 +60,32 @@ static u32 pdc_reg_read(int reg, u32 i) return readl_relaxed(pdc_base + reg + i * sizeof(u32)); } -static void pdc_enable_intr(struct irq_data *d, bool on) +static void __pdc_enable_intr(int pin_out, bool on) { - int pin_out = d->hwirq; unsigned long enable; - unsigned long flags; - u32 index, mask; - index = pin_out / 32; - mask = pin_out % 32; + if (pdc_version < PDC_VERSION_3_2) { + u32 index, mask; + + index = pin_out / 32; + mask = pin_out % 32; + + enable = pdc_reg_read(IRQ_ENABLE_BANK, index); + __assign_bit(mask, &enable, on); + pdc_reg_write(IRQ_ENABLE_BANK, index, enable); + } else { + enable = pdc_reg_read(IRQ_i_CFG, pin_out); + __assign_bit(IRQ_i_CFG_IRQ_ENABLE, &enable, on); + pdc_reg_write(IRQ_i_CFG, pin_out, enable); + } +} + +static void pdc_enable_intr(struct irq_data *d, bool on) +{ + unsigned long flags; raw_spin_lock_irqsave(&pdc_lock, flags); - enable = pdc_reg_read(IRQ_ENABLE_BANK, index); - __assign_bit(mask, &enable, on); - pdc_reg_write(IRQ_ENABLE_BANK, index, enable); + __pdc_enable_intr(d->hwirq, on); raw_spin_unlock_irqrestore(&pdc_lock, flags); } @@ -142,6 +166,7 @@ static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type) } old_pdc_type = pdc_reg_read(IRQ_i_CFG, d->hwirq); + pdc_type |= (old_pdc_type & ~IRQ_i_CFG_TYPE_MASK); pdc_reg_write(IRQ_i_CFG, d->hwirq, pdc_type); ret = irq_chip_set_type_parent(d, type); @@ -246,7 +271,6 @@ static const struct irq_domain_ops qcom_pdc_ops = { static int pdc_setup_pin_mapping(struct device_node *np) { int ret, n, i; - u32 irq_index, reg_index, val; n = of_property_count_elems_of_size(np, "qcom,pdc-ranges", sizeof(u32)); if (n <= 0 || n % 3) @@ -276,29 +300,38 @@ static int pdc_setup_pin_mapping(struct device_node *np) if (ret) return ret; - for (i = 0; i < pdc_region[n].cnt; i++) { - reg_index = (i + pdc_region[n].pin_base) >> 5; - irq_index = (i + pdc_region[n].pin_base) & 0x1f; - val = pdc_reg_read(IRQ_ENABLE_BANK, reg_index); - val &= ~BIT(irq_index); - pdc_reg_write(IRQ_ENABLE_BANK, reg_index, val); - } + for (i = 0; i < pdc_region[n].cnt; i++) + __pdc_enable_intr(i + pdc_region[n].pin_base, 0); } return 0; } +#define QCOM_PDC_SIZE 0x30000 + static int qcom_pdc_init(struct device_node *node, struct device_node *parent) { struct irq_domain *parent_domain, *pdc_domain; + resource_size_t res_size; + struct resource res; int ret; - pdc_base = of_iomap(node, 0); + /* compat with old sm8150 DT which had very small region for PDC */ + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + + res_size = max_t(resource_size_t, resource_size(&res), QCOM_PDC_SIZE); + if (res_size > resource_size(&res)) + pr_warn("%pOF: invalid reg size, please fix DT\n", node); + + pdc_base = ioremap(res.start, res_size); if (!pdc_base) { pr_err("%pOF: unable to map PDC registers\n", node); return -ENXIO; } + pdc_version = pdc_reg_read(PDC_VERSION_REG, 0); + parent_domain = irq_find_host(parent); if (!parent_domain) { pr_err("%pOF: unable to find PDC's parent domain\n", node); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f2662c21a6df..5315fd261c23 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -753,7 +753,8 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, int err; u8 *buf; - reqsize = ALIGN(crypto_skcipher_reqsize(tfm), __alignof__(__le64)); + reqsize = sizeof(*req) + crypto_skcipher_reqsize(tfm); + reqsize = ALIGN(reqsize, __alignof__(__le64)); req = kmalloc(reqsize + cc->iv_size, GFP_NOIO); if (!req) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ad8e670a2f9b..b487f7acc860 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -748,17 +748,16 @@ err: /* * Cleanup zoned device information. */ -static void dmz_put_zoned_device(struct dm_target *ti) +static void dmz_put_zoned_devices(struct dm_target *ti) { struct dmz_target *dmz = ti->private; int i; - for (i = 0; i < dmz->nr_ddevs; i++) { - if (dmz->ddev[i]) { + for (i = 0; i < dmz->nr_ddevs; i++) + if (dmz->ddev[i]) dm_put_device(ti, dmz->ddev[i]); - dmz->ddev[i] = NULL; - } - } + + kfree(dmz->ddev); } static int dmz_fixup_devices(struct dm_target *ti) @@ -948,7 +947,7 @@ err_bio: err_meta: dmz_dtr_metadata(dmz->metadata); err_dev: - dmz_put_zoned_device(ti); + dmz_put_zoned_devices(ti); err: kfree(dmz->dev); kfree(dmz); @@ -978,7 +977,7 @@ static void dmz_dtr(struct dm_target *ti) bioset_exit(&dmz->bio_set); - dmz_put_zoned_device(ti); + dmz_put_zoned_devices(ti); mutex_destroy(&dmz->chunk_lock); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4cb9c608ee19..284cd71bcc68 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -854,6 +854,13 @@ struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); r5l_wake_reclaim(conf->log, 0); + + /* release batch_last before wait to avoid risk of deadlock */ + if (ctx && ctx->batch_last) { + raid5_release_stripe(ctx->batch_last); + ctx->batch_last = NULL; + } + wait_event_lock_irq(conf->wait_for_stripe, is_inactive_blocked(conf, hash), *(conf->hash_locks + hash)); diff --git a/drivers/media/pci/intel/Kconfig b/drivers/media/pci/intel/Kconfig index e113902fa806..ee4684159d3d 100644 --- a/drivers/media/pci/intel/Kconfig +++ b/drivers/media/pci/intel/Kconfig @@ -1,11 +1,19 @@ # SPDX-License-Identifier: GPL-2.0-only + +source "drivers/media/pci/intel/ipu3/Kconfig" +source "drivers/media/pci/intel/ivsc/Kconfig" + config IPU_BRIDGE - tristate + tristate "Intel IPU Bridge" depends on I2C && ACPI help - This is a helper module for the IPU bridge, which can be - used by ipu3 and other drivers. In order to handle module - dependencies, this is selected by each driver that needs it. + The IPU bridge is a helper library for Intel IPU drivers to + function on systems shipped with Windows. -source "drivers/media/pci/intel/ipu3/Kconfig" -source "drivers/media/pci/intel/ivsc/Kconfig" + Currently used by the ipu3-cio2 and atomisp drivers. + + Supported systems include: + + - Microsoft Surface models (except Surface Pro 3) + - The Lenovo Miix line (for example the 510, 520, 710 and 720) + - Dell 7285 diff --git a/drivers/media/pci/intel/ipu3/Kconfig b/drivers/media/pci/intel/ipu3/Kconfig index 0951545eab21..c0a250daa927 100644 --- a/drivers/media/pci/intel/ipu3/Kconfig +++ b/drivers/media/pci/intel/ipu3/Kconfig @@ -2,13 +2,13 @@ config VIDEO_IPU3_CIO2 tristate "Intel ipu3-cio2 driver" depends on VIDEO_DEV && PCI + depends on IPU_BRIDGE || !IPU_BRIDGE depends on ACPI || COMPILE_TEST depends on X86 select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_FWNODE select VIDEOBUF2_DMA_SG - select IPU_BRIDGE if CIO2_BRIDGE help This is the Intel IPU3 CIO2 CSI-2 receiver unit, found in Intel @@ -18,22 +18,3 @@ config VIDEO_IPU3_CIO2 Say Y or M here if you have a Skylake/Kaby Lake SoC with MIPI CSI-2 connected camera. The module will be called ipu3-cio2. - -config CIO2_BRIDGE - bool "IPU3 CIO2 Sensors Bridge" - depends on VIDEO_IPU3_CIO2 && ACPI - depends on I2C - help - This extension provides an API for the ipu3-cio2 driver to create - connections to cameras that are hidden in the SSDB buffer in ACPI. - It can be used to enable support for cameras in detachable / hybrid - devices that ship with Windows. - - Say Y here if your device is a detachable / hybrid laptop that comes - with Windows installed by the OEM, for example: - - - Microsoft Surface models (except Surface Pro 3) - - The Lenovo Miix line (for example the 510, 520, 710 and 720) - - Dell 7285 - - If in doubt, say N here. diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 212753450576..a8cb981544f7 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -6,7 +6,7 @@ config INTEL_VSC depends on INTEL_MEI && ACPI && VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API - select V4L2_ASYNC + select V4L2_FWNODE help This adds support for Intel Visual Sensing Controller (IVSC). diff --git a/drivers/media/platform/intel/pxa_camera.c b/drivers/media/platform/intel/pxa_camera.c index 6e6caf50e11e..59b89e421dc2 100644 --- a/drivers/media/platform/intel/pxa_camera.c +++ b/drivers/media/platform/intel/pxa_camera.c @@ -2398,7 +2398,7 @@ static int pxa_camera_probe(struct platform_device *pdev) PXA_CAM_DRV_NAME, pcdev); if (err) { dev_err(&pdev->dev, "Camera interrupt register failed\n"); - goto exit_v4l2_device_unregister; + goto exit_deactivate; } pcdev->notifier.ops = &pxa_camera_sensor_ops; diff --git a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c index d299cc2962a5..ae6290d28f8e 100644 --- a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c +++ b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c @@ -138,7 +138,8 @@ int vpu_enc_init(struct venc_vpu_inst *vpu) vpu->ctx->vpu_inst = vpu; status = mtk_vcodec_fw_ipi_register(vpu->ctx->dev->fw_handler, vpu->id, - vpu_enc_ipi_handler, "venc", NULL); + vpu_enc_ipi_handler, "venc", + vpu->ctx->dev); if (status) { mtk_venc_err(vpu->ctx, "vpu_ipi_register fail %d", status); diff --git a/drivers/net/Makefile b/drivers/net/Makefile index e26f98f897c5..8a83db32509d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_MHI_NET) += mhi_net.o # Networking Drivers # obj-$(CONFIG_ARCNET) += arcnet/ -obj-$(CONFIG_DEV_APPLETALK) += appletalk/ obj-$(CONFIG_CAIF) += caif/ obj-$(CONFIG_CAN) += can/ obj-$(CONFIG_NET_DSA) += dsa/ diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig deleted file mode 100644 index b94f731e4576..000000000000 --- a/drivers/net/appletalk/Kconfig +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Appletalk driver configuration -# -config ATALK - tristate "Appletalk protocol support" - select LLC - help - AppleTalk is the protocol that Apple computers can use to communicate - on a network. If your Linux box is connected to such a network and you - wish to connect to it, say Y. You will need to use the netatalk package - so that your Linux box can act as a print and file server for Macs as - well as access AppleTalk printers. Check out - <http://www.zettabyte.net/netatalk/> on the WWW for details. - EtherTalk is the name used for AppleTalk over Ethernet and the - cheaper and slower LocalTalk is AppleTalk over a proprietary Apple - network using serial links. EtherTalk and LocalTalk are fully - supported by Linux. - - General information about how to connect Linux, Windows machines and - Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. The - NET3-4-HOWTO, available from - <http://www.tldp.org/docs.html#howto>, contains valuable - information as well. - - To compile this driver as a module, choose M here: the module will be - called appletalk. You almost certainly want to compile it as a - module so you can restart your AppleTalk stack without rebooting - your machine. I hear that the GNU boycott of Apple is over, so - even politically correct people are allowed to say Y here. - -config DEV_APPLETALK - tristate "Appletalk interfaces support" - depends on ATALK - help - AppleTalk is the protocol that Apple computers can use to communicate - on a network. If your Linux box is connected to such a network, and wish - to do IP over it, or you have a LocalTalk card and wish to use it to - connect to the AppleTalk network, say Y. - -config IPDDP - tristate "Appletalk-IP driver support" - depends on DEV_APPLETALK && ATALK - help - This allows IP networking for users who only have AppleTalk - networking available. This feature is experimental. With this - driver, you can encapsulate IP inside AppleTalk (e.g. if your Linux - box is stuck on an AppleTalk only network) or decapsulate (e.g. if - you want your Linux box to act as an Internet gateway for a zoo of - AppleTalk connected Macs). Please see the file - <file:Documentation/networking/ipddp.rst> for more information. - - If you say Y here, the AppleTalk-IP support will be compiled into - the kernel. In this case, you can either use encapsulation or - decapsulation, but not both. With the following two questions, you - decide which one you want. - - To compile the AppleTalk-IP support as a module, choose M here: the - module will be called ipddp. - In this case, you will be able to use both encapsulation and - decapsulation simultaneously, by loading two copies of the module - and specifying different values for the module option ipddp_mode. - -config IPDDP_ENCAP - bool "IP to Appletalk-IP Encapsulation support" - depends on IPDDP - help - If you say Y here, the AppleTalk-IP code will be able to encapsulate - IP packets inside AppleTalk frames; this is useful if your Linux box - is stuck on an AppleTalk network (which hopefully contains a - decapsulator somewhere). Please see - <file:Documentation/networking/ipddp.rst> for more information. diff --git a/drivers/net/appletalk/Makefile b/drivers/net/appletalk/Makefile deleted file mode 100644 index d8c7b23ec7ff..000000000000 --- a/drivers/net/appletalk/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for drivers/net/appletalk -# - -obj-$(CONFIG_IPDDP) += ipddp.o diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c deleted file mode 100644 index d558535390f9..000000000000 --- a/drivers/net/appletalk/ipddp.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * ipddp.c: IP to Appletalk-IP Encapsulation driver for Linux - * Appletalk-IP to IP Decapsulation driver for Linux - * - * Authors: - * - DDP-IP Encap by: Bradford W. Johnson <johns393@maroon.tc.umn.edu> - * - DDP-IP Decap by: Jay Schulist <jschlst@samba.org> - * - * Derived from: - * - Almost all code already existed in net/appletalk/ddp.c I just - * moved/reorginized it into a driver file. Original IP-over-DDP code - * was done by Bradford W. Johnson <johns393@maroon.tc.umn.edu> - * - skeleton.c: A network driver outline for linux. - * Written 1993-94 by Donald Becker. - * - dummy.c: A dummy net driver. By Nick Holloway. - * - MacGate: A user space Daemon for Appletalk-IP Decap for - * Linux by Jay Schulist <jschlst@samba.org> - * - * Copyright 1993 United States Government as represented by the - * Director, National Security Agency. - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - */ - -#include <linux/compat.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/ip.h> -#include <linux/atalk.h> -#include <linux/if_arp.h> -#include <linux/slab.h> -#include <net/route.h> -#include <linux/uaccess.h> - -#include "ipddp.h" /* Our stuff */ - -static const char version[] = KERN_INFO "ipddp.c:v0.01 8/28/97 Bradford W. Johnson <johns393@maroon.tc.umn.edu>\n"; - -static struct ipddp_route *ipddp_route_list; -static DEFINE_SPINLOCK(ipddp_route_lock); - -#ifdef CONFIG_IPDDP_ENCAP -static int ipddp_mode = IPDDP_ENCAP; -#else -static int ipddp_mode = IPDDP_DECAP; -#endif - -/* Index to functions, as function prototypes. */ -static netdev_tx_t ipddp_xmit(struct sk_buff *skb, - struct net_device *dev); -static int ipddp_create(struct ipddp_route *new_rt); -static int ipddp_delete(struct ipddp_route *rt); -static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt); -static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr, - void __user *data, int cmd); - -static const struct net_device_ops ipddp_netdev_ops = { - .ndo_start_xmit = ipddp_xmit, - .ndo_siocdevprivate = ipddp_siocdevprivate, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -static struct net_device * __init ipddp_init(void) -{ - static unsigned version_printed; - struct net_device *dev; - int err; - - dev = alloc_etherdev(0); - if (!dev) - return ERR_PTR(-ENOMEM); - - netif_keep_dst(dev); - strcpy(dev->name, "ipddp%d"); - - if (version_printed++ == 0) - printk(version); - - /* Initialize the device structure. */ - dev->netdev_ops = &ipddp_netdev_ops; - - dev->type = ARPHRD_IPDDP; /* IP over DDP tunnel */ - dev->mtu = 585; - dev->flags |= IFF_NOARP; - - /* - * The worst case header we will need is currently a - * ethernet header (14 bytes) and a ddp header (sizeof ddpehdr+1) - * We send over SNAP so that takes another 8 bytes. - */ - dev->hard_header_len = 14+8+sizeof(struct ddpehdr)+1; - - err = register_netdev(dev); - if (err) { - free_netdev(dev); - return ERR_PTR(err); - } - - /* Let the user now what mode we are in */ - if(ipddp_mode == IPDDP_ENCAP) - printk("%s: Appletalk-IP Encap. mode by Bradford W. Johnson <johns393@maroon.tc.umn.edu>\n", - dev->name); - if(ipddp_mode == IPDDP_DECAP) - printk("%s: Appletalk-IP Decap. mode by Jay Schulist <jschlst@samba.org>\n", - dev->name); - - return dev; -} - - -/* - * Transmit LLAP/ELAP frame using aarp_send_ddp. - */ -static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct rtable *rtable = skb_rtable(skb); - __be32 paddr = 0; - struct ddpehdr *ddp; - struct ipddp_route *rt; - struct atalk_addr *our_addr; - - if (rtable->rt_gw_family == AF_INET) - paddr = rtable->rt_gw4; - - spin_lock(&ipddp_route_lock); - - /* - * Find appropriate route to use, based only on IP number. - */ - for(rt = ipddp_route_list; rt != NULL; rt = rt->next) - { - if(rt->ip == paddr) - break; - } - if(rt == NULL) { - spin_unlock(&ipddp_route_lock); - return NETDEV_TX_OK; - } - - our_addr = atalk_find_dev_addr(rt->dev); - - if(ipddp_mode == IPDDP_DECAP) - /* - * Pull off the excess room that should not be there. - * This is due to a hard-header problem. This is the - * quick fix for now though, till it breaks. - */ - skb_pull(skb, 35-(sizeof(struct ddpehdr)+1)); - - /* Create the Extended DDP header */ - ddp = (struct ddpehdr *)skb->data; - ddp->deh_len_hops = htons(skb->len + (1<<10)); - ddp->deh_sum = 0; - - /* - * For Localtalk we need aarp_send_ddp to strip the - * long DDP header and place a shot DDP header on it. - */ - if(rt->dev->type == ARPHRD_LOCALTLK) - { - ddp->deh_dnet = 0; /* FIXME more hops?? */ - ddp->deh_snet = 0; - } - else - { - ddp->deh_dnet = rt->at.s_net; /* FIXME more hops?? */ - ddp->deh_snet = our_addr->s_net; - } - ddp->deh_dnode = rt->at.s_node; - ddp->deh_snode = our_addr->s_node; - ddp->deh_dport = 72; - ddp->deh_sport = 72; - - *((__u8 *)(ddp+1)) = 22; /* ddp type = IP */ - - skb->protocol = htons(ETH_P_ATALK); /* Protocol has changed */ - - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - - aarp_send_ddp(rt->dev, skb, &rt->at, NULL); - - spin_unlock(&ipddp_route_lock); - - return NETDEV_TX_OK; -} - -/* - * Create a routing entry. We first verify that the - * record does not already exist. If it does we return -EEXIST - */ -static int ipddp_create(struct ipddp_route *new_rt) -{ - struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL); - - if (rt == NULL) - return -ENOMEM; - - rt->ip = new_rt->ip; - rt->at = new_rt->at; - rt->next = NULL; - if ((rt->dev = atrtr_get_dev(&rt->at)) == NULL) { - kfree(rt); - return -ENETUNREACH; - } - - spin_lock_bh(&ipddp_route_lock); - if (__ipddp_find_route(rt)) { - spin_unlock_bh(&ipddp_route_lock); - kfree(rt); - return -EEXIST; - } - - rt->next = ipddp_route_list; - ipddp_route_list = rt; - - spin_unlock_bh(&ipddp_route_lock); - - return 0; -} - -/* - * Delete a route, we only delete a FULL match. - * If route does not exist we return -ENOENT. - */ -static int ipddp_delete(struct ipddp_route *rt) -{ - struct ipddp_route **r = &ipddp_route_list; - struct ipddp_route *tmp; - - spin_lock_bh(&ipddp_route_lock); - while((tmp = *r) != NULL) - { - if(tmp->ip == rt->ip && - tmp->at.s_net == rt->at.s_net && - tmp->at.s_node == rt->at.s_node) - { - *r = tmp->next; - spin_unlock_bh(&ipddp_route_lock); - kfree(tmp); - return 0; - } - r = &tmp->next; - } - - spin_unlock_bh(&ipddp_route_lock); - return -ENOENT; -} - -/* - * Find a routing entry, we only return a FULL match - */ -static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt) -{ - struct ipddp_route *f; - - for(f = ipddp_route_list; f != NULL; f = f->next) - { - if(f->ip == rt->ip && - f->at.s_net == rt->at.s_net && - f->at.s_node == rt->at.s_node) - return f; - } - - return NULL; -} - -static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr, - void __user *data, int cmd) -{ - struct ipddp_route rcp, rcp2, *rp; - - if (in_compat_syscall()) - return -EOPNOTSUPP; - - if(!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&rcp, data, sizeof(rcp))) - return -EFAULT; - - switch(cmd) - { - case SIOCADDIPDDPRT: - return ipddp_create(&rcp); - - case SIOCFINDIPDDPRT: - spin_lock_bh(&ipddp_route_lock); - rp = __ipddp_find_route(&rcp); - if (rp) { - memset(&rcp2, 0, sizeof(rcp2)); - rcp2.ip = rp->ip; - rcp2.at = rp->at; - rcp2.flags = rp->flags; - } - spin_unlock_bh(&ipddp_route_lock); - - if (rp) { - if (copy_to_user(data, &rcp2, - sizeof(struct ipddp_route))) - return -EFAULT; - return 0; - } else - return -ENOENT; - - case SIOCDELIPDDPRT: - return ipddp_delete(&rcp); - - default: - return -EINVAL; - } -} - -static struct net_device *dev_ipddp; - -MODULE_LICENSE("GPL"); -module_param(ipddp_mode, int, 0); - -static int __init ipddp_init_module(void) -{ - dev_ipddp = ipddp_init(); - return PTR_ERR_OR_ZERO(dev_ipddp); -} - -static void __exit ipddp_cleanup_module(void) -{ - struct ipddp_route *p; - - unregister_netdev(dev_ipddp); - free_netdev(dev_ipddp); - - while (ipddp_route_list) { - p = ipddp_route_list->next; - kfree(ipddp_route_list); - ipddp_route_list = p; - } -} - -module_init(ipddp_init_module); -module_exit(ipddp_cleanup_module); diff --git a/drivers/net/appletalk/ipddp.h b/drivers/net/appletalk/ipddp.h deleted file mode 100644 index 9a8e45a46925..000000000000 --- a/drivers/net/appletalk/ipddp.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ipddp.h: Header for IP-over-DDP driver for Linux. - */ - -#ifndef __LINUX_IPDDP_H -#define __LINUX_IPDDP_H - -#ifdef __KERNEL__ - -#define SIOCADDIPDDPRT (SIOCDEVPRIVATE) -#define SIOCDELIPDDPRT (SIOCDEVPRIVATE+1) -#define SIOCFINDIPDDPRT (SIOCDEVPRIVATE+2) - -struct ipddp_route -{ - struct net_device *dev; /* Carrier device */ - __be32 ip; /* IP address */ - struct atalk_addr at; /* Gateway appletalk address */ - int flags; - struct ipddp_route *next; -}; - -#define IPDDP_ENCAP 1 -#define IPDDP_DECAP 2 - -#endif /* __KERNEL__ */ -#endif /* __LINUX_IPDDP_H */ diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 683203f87ae2..47a9c2a5583c 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -306,8 +306,10 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (!sock) return -ESHUTDOWN; - rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, info, - IPPROTO_UDP, use_cache); + rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, &info->key, + 0, 0, key->tos, + use_cache ? + (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -483,8 +485,9 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, struct rtable *rt; __be32 saddr; - rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, - info, IPPROTO_UDP, use_cache); + rt = udp_tunnel_dst_lookup(skb, dev, bareudp->net, 0, &saddr, + &info->key, 0, 0, info->key.tos, + use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 8d6fc0852bf7..eb410714afc2 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -191,7 +191,7 @@ config CAN_SLCAN config CAN_SUN4I tristate "Allwinner A10 CAN controller" - depends on MACH_SUN4I || MACH_SUN7I || RISCV || COMPILE_TEST + depends on MACH_SUN4I || MACH_SUN7I || (RISCV && ARCH_SUNXI) || COMPILE_TEST help Say Y here if you want to use CAN controller found on Allwinner A10/A20/D1 SoCs. diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c index 77091f7d1fa7..46e7b6db4a1e 100644 --- a/drivers/net/can/dev/rx-offload.c +++ b/drivers/net/can/dev/rx-offload.c @@ -67,7 +67,7 @@ static int can_rx_offload_napi_poll(struct napi_struct *napi, int quota) /* Check if there was another interrupt */ if (!skb_queue_empty(&offload->skb_queue)) - napi_reschedule(&offload->napi); + napi_schedule(&offload->napi); } return work_done; diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index add39e922b89..d15f85a40c1e 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -348,7 +348,7 @@ static struct flexcan_devtype_data fsl_imx8mp_devtype_data = { static struct flexcan_devtype_data fsl_imx93_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_RX_MAILBOX | - FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_AUTO_STOP_MODE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR | FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SUPPORT_ECC | FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX | FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR, @@ -544,11 +544,6 @@ static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv) } else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR) { regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr, 1 << priv->stm.req_bit, 1 << priv->stm.req_bit); - } else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) { - /* For the auto stop mode, software do nothing, hardware will cover - * all the operation automatically after system go into low power mode. - */ - return 0; } return flexcan_low_power_enter_ack(priv); @@ -574,12 +569,6 @@ static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv) reg_mcr &= ~FLEXCAN_MCR_SLF_WAK; priv->write(reg_mcr, ®s->mcr); - /* For the auto stop mode, hardware will exist stop mode - * automatically after system go out of low power mode. - */ - if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) - return 0; - return flexcan_low_power_exit_ack(priv); } @@ -1994,13 +1983,18 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev) ret = flexcan_setup_stop_mode_scfw(pdev); else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR) ret = flexcan_setup_stop_mode_gpr(pdev); - else if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) - ret = 0; else /* return 0 directly if doesn't support stop mode feature */ return 0; - if (ret) + /* If ret is -EINVAL, this means SoC claim to support stop mode, but + * dts file lack the stop mode property definition. For this case, + * directly return 0, this will skip the wakeup capable setting and + * will not block the driver probe. + */ + if (ret == -EINVAL) + return 0; + else if (ret) return ret; device_set_wakeup_capable(&pdev->dev, true); @@ -2320,16 +2314,8 @@ static int __maybe_unused flexcan_noirq_suspend(struct device *device) if (netif_running(dev)) { int err; - if (device_may_wakeup(device)) { + if (device_may_wakeup(device)) flexcan_enable_wakeup_irq(priv, true); - /* For auto stop mode, need to keep the clock on before - * system go into low power mode. After system go into - * low power mode, hardware will config the flexcan into - * stop mode, and gate off the clock automatically. - */ - if (priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE) - return 0; - } err = pm_runtime_force_suspend(device); if (err) @@ -2347,15 +2333,9 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device) if (netif_running(dev)) { int err; - /* For the wakeup in auto stop mode, no need to gate on the - * clock here, hardware will do this automatically. - */ - if (!(device_may_wakeup(device) && - priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE)) { - err = pm_runtime_force_resume(device); - if (err) - return err; - } + err = pm_runtime_force_resume(device); + if (err) + return err; if (device_may_wakeup(device)) flexcan_enable_wakeup_irq(priv, false); diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h index 91402977780b..025c3417031f 100644 --- a/drivers/net/can/flexcan/flexcan.h +++ b/drivers/net/can/flexcan/flexcan.h @@ -68,8 +68,6 @@ #define FLEXCAN_QUIRK_SUPPORT_RX_MAILBOX_RTR BIT(15) /* Device supports RX via FIFO */ #define FLEXCAN_QUIRK_SUPPORT_RX_FIFO BIT(16) -/* auto enter stop mode to support wakeup */ -#define FLEXCAN_QUIRK_AUTO_STOP_MODE BIT(17) struct flexcan_devtype_data { u32 quirks; /* quirks needed for different IP cores */ diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 8a4143809d33..ae8c42f5debd 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -125,7 +125,7 @@ static const struct tcan4x5x_version_info tcan4x5x_versions[] = { }, [TCAN4553] = { .name = "4553", - .id2_register = 0x32353534, + .id2_register = 0x33353534, }, /* generic version with no id2_register at the end */ [TCAN4X5X] = { diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 7ffa6c04765e..ddb3247948ad 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -392,7 +392,13 @@ static irqreturn_t sja1000_reset_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; netdev_dbg(dev, "performing a soft reset upon overrun\n"); - sja1000_start(dev); + + netif_tx_lock(dev); + + can_free_echo_skb(dev, 0, NULL); + sja1000_set_mode(dev, CAN_MODE_START); + + netif_tx_unlock(dev); return IRQ_HANDLED; } diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 5b139f2206b6..c70ed67cc188 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -277,6 +277,14 @@ static int dsa_loop_port_max_mtu(struct dsa_switch *ds, int port) return ETH_MAX_MTU; } +static void dsa_loop_phylink_get_caps(struct dsa_switch *dsa, int port, + struct phylink_config *config) +{ + bitmap_fill(config->supported_interfaces, PHY_INTERFACE_MODE_MAX); + __clear_bit(PHY_INTERFACE_MODE_NA, config->supported_interfaces); + config->mac_capabilities = ~0; +} + static const struct dsa_switch_ops dsa_loop_driver = { .get_tag_protocol = dsa_loop_get_protocol, .setup = dsa_loop_setup, @@ -295,6 +303,7 @@ static const struct dsa_switch_ops dsa_loop_driver = { .port_vlan_del = dsa_loop_port_vlan_del, .port_change_mtu = dsa_loop_port_change_mtu, .port_max_mtu = dsa_loop_port_max_mtu, + .phylink_get_caps = dsa_loop_phylink_get_caps, }; static int dsa_loop_drv_probe(struct mdio_device *mdiodev) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 25abf2caf5fb..1a2d5797bf98 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1759,8 +1759,7 @@ static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, return; for (i = 0; i < ARRAY_SIZE(gswip_rmon_cnt); i++) - strncpy(data + i * ETH_GSTRING_LEN, gswip_rmon_cnt[i].name, - ETH_GSTRING_LEN); + ethtool_sprintf(&data, "%s", gswip_rmon_cnt[i].name); } static u32 gswip_bcm_ram_entry_read(struct gswip_priv *priv, u32 table, diff --git a/drivers/net/dsa/microchip/ksz9477_acl.c b/drivers/net/dsa/microchip/ksz9477_acl.c index 06d74c19eb94..7ba778df63ac 100644 --- a/drivers/net/dsa/microchip/ksz9477_acl.c +++ b/drivers/net/dsa/microchip/ksz9477_acl.c @@ -420,10 +420,6 @@ static int ksz9477_validate_and_get_src_count(struct ksz_device *dev, int port, return -EINVAL; } - /* Nothing to do */ - if (src_idx == dst_idx) - return 0; - /* Validate if the source entries are contiguous */ ret = ksz9477_acl_get_cont_entr(dev, port, src_idx); if (ret < 0) @@ -556,6 +552,10 @@ static int ksz9477_acl_move_entries(struct ksz_device *dev, int port, struct ksz9477_acl_entries *acles = &acl->acles; int src_count, ret, dst_count; + /* Nothing to do */ + if (src_idx == dst_idx) + return 0; + ret = ksz9477_validate_and_get_src_count(dev, port, src_idx, dst_idx, &src_count, &dst_count); if (ret) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 0d62c69dfbb6..ecf5d3deb36e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -836,8 +836,7 @@ mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset, return; for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++) - strncpy(data + i * ETH_GSTRING_LEN, mt7530_mib[i].name, - ETH_GSTRING_LEN); + ethtool_sprintf(&data, "%s", mt7530_mib[i].name); } static void diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index de1dc22cf683..4ce68e655a63 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -505,8 +505,8 @@ qca8k_bulk_read(void *ctx, const void *reg_buf, size_t reg_len, void *val_buf, size_t val_len) { int i, count = val_len / sizeof(u32), ret; - u32 reg = *(u32 *)reg_buf & U16_MAX; struct qca8k_priv *priv = ctx; + u32 reg = *(u16 *)reg_buf; if (priv->mgmt_master && !qca8k_read_eth(priv, reg, val_buf, val_len)) @@ -527,8 +527,8 @@ qca8k_bulk_gather_write(void *ctx, const void *reg_buf, size_t reg_len, const void *val_buf, size_t val_len) { int i, count = val_len / sizeof(u32), ret; - u32 reg = *(u32 *)reg_buf & U16_MAX; struct qca8k_priv *priv = ctx; + u32 reg = *(u16 *)reg_buf; u32 *val = (u32 *)val_buf; if (priv->mgmt_master && @@ -666,6 +666,15 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, goto err_read_skb; } + /* It seems that accessing the switch's internal PHYs via management + * packets still uses the MDIO bus within the switch internally, and + * these accesses can conflict with external MDIO accesses to other + * devices on the MDIO bus. + * We therefore need to lock the MDIO bus onto which the switch is + * connected. + */ + mutex_lock(&priv->bus->mdio_lock); + /* Actually start the request: * 1. Send mdio master packet * 2. Busy Wait for mdio master command @@ -678,6 +687,7 @@ qca8k_phy_eth_command(struct qca8k_priv *priv, bool read, int phy, mgmt_master = priv->mgmt_master; if (!mgmt_master) { mutex_unlock(&mgmt_eth_data->mutex); + mutex_unlock(&priv->bus->mdio_lock); ret = -EINVAL; goto err_mgmt_master; } @@ -765,6 +775,7 @@ exit: QCA8K_ETHERNET_TIMEOUT); mutex_unlock(&mgmt_eth_data->mutex); + mutex_unlock(&priv->bus->mdio_lock); return ret; diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c index fce04ce12cf9..9ff0a3c1cb91 100644 --- a/drivers/net/dsa/qca/qca8k-common.c +++ b/drivers/net/dsa/qca/qca8k-common.c @@ -487,8 +487,7 @@ void qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, return; for (i = 0; i < priv->info->mib_count; i++) - strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, - ETH_GSTRING_LEN); + ethtool_sprintf(&data, "%s", ar8327_mib[i].name); } void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c index 41ea3b5a42b1..d171c18dd354 100644 --- a/drivers/net/dsa/realtek/rtl8365mb.c +++ b/drivers/net/dsa/realtek/rtl8365mb.c @@ -1303,8 +1303,7 @@ static void rtl8365mb_get_strings(struct dsa_switch *ds, int port, u32 stringset for (i = 0; i < RTL8365MB_MIB_END; i++) { struct rtl8365mb_mib_counter *mib = &rtl8365mb_mib_counters[i]; - - strncpy(data + i * ETH_GSTRING_LEN, mib->name, ETH_GSTRING_LEN); + ethtool_sprintf(&data, "%s", mib->name); } } diff --git a/drivers/net/dsa/realtek/rtl8366-core.c b/drivers/net/dsa/realtek/rtl8366-core.c index dc5f75be3017..82e267b8fddb 100644 --- a/drivers/net/dsa/realtek/rtl8366-core.c +++ b/drivers/net/dsa/realtek/rtl8366-core.c @@ -395,17 +395,13 @@ void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) { struct realtek_priv *priv = ds->priv; - struct rtl8366_mib_counter *mib; int i; if (port >= priv->num_ports) return; - for (i = 0; i < priv->num_mib_counters; i++) { - mib = &priv->mib_counters[i]; - strncpy(data + i * ETH_GSTRING_LEN, - mib->name, ETH_GSTRING_LEN); - } + for (i = 0; i < priv->num_mib_counters; i++) + ethtool_sprintf(&data, "%s", priv->mib_counters[i].name); } EXPORT_SYMBOL_GPL(rtl8366_get_strings); diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 4f09e7438f3b..e6f29e4e508c 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -928,7 +928,8 @@ static void vsc73xx_get_strings(struct dsa_switch *ds, int port, u32 stringset, const struct vsc73xx_counter *cnt; struct vsc73xx *vsc = ds->priv; u8 indices[6]; - int i, j; + u8 *buf = data; + int i; u32 val; int ret; @@ -948,10 +949,7 @@ static void vsc73xx_get_strings(struct dsa_switch *ds, int port, u32 stringset, indices[5] = ((val >> 26) & 0x1f); /* TX counter 2 */ /* The first counters is the RX octets */ - j = 0; - strncpy(data + j * ETH_GSTRING_LEN, - "RxEtherStatsOctets", ETH_GSTRING_LEN); - j++; + ethtool_sprintf(&buf, "RxEtherStatsOctets"); /* Each port supports recording 3 RX counters and 3 TX counters, * figure out what counters we use in this set-up and return the @@ -961,23 +959,16 @@ static void vsc73xx_get_strings(struct dsa_switch *ds, int port, u32 stringset, */ for (i = 0; i < 3; i++) { cnt = vsc73xx_find_counter(vsc, indices[i], false); - if (cnt) - strncpy(data + j * ETH_GSTRING_LEN, - cnt->name, ETH_GSTRING_LEN); - j++; + ethtool_sprintf(&buf, "%s", cnt ? cnt->name : ""); } /* TX stats begins with the number of TX octets */ - strncpy(data + j * ETH_GSTRING_LEN, - "TxEtherStatsOctets", ETH_GSTRING_LEN); - j++; + ethtool_sprintf(&buf, "TxEtherStatsOctets"); for (i = 3; i < 6; i++) { cnt = vsc73xx_find_counter(vsc, indices[i], true); - if (cnt) - strncpy(data + j * ETH_GSTRING_LEN, - cnt->name, ETH_GSTRING_LEN); - j++; + ethtool_sprintf(&buf, "%s", cnt ? cnt->name : ""); + } } @@ -1037,6 +1028,31 @@ static int vsc73xx_get_max_mtu(struct dsa_switch *ds, int port) return 9600 - ETH_HLEN - ETH_FCS_LEN; } +static void vsc73xx_phylink_get_caps(struct dsa_switch *dsa, int port, + struct phylink_config *config) +{ + unsigned long *interfaces = config->supported_interfaces; + + if (port == 5) + return; + + if (port == CPU_PORT) { + __set_bit(PHY_INTERFACE_MODE_MII, interfaces); + __set_bit(PHY_INTERFACE_MODE_REVMII, interfaces); + __set_bit(PHY_INTERFACE_MODE_GMII, interfaces); + __set_bit(PHY_INTERFACE_MODE_RGMII, interfaces); + } + + if (port <= 4) { + /* Internal PHYs */ + __set_bit(PHY_INTERFACE_MODE_INTERNAL, interfaces); + /* phylib default */ + __set_bit(PHY_INTERFACE_MODE_GMII, interfaces); + } + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000; +} + static const struct dsa_switch_ops vsc73xx_ds_ops = { .get_tag_protocol = vsc73xx_get_tag_protocol, .setup = vsc73xx_setup, @@ -1050,6 +1066,7 @@ static const struct dsa_switch_ops vsc73xx_ds_ops = { .port_disable = vsc73xx_port_disable, .port_change_mtu = vsc73xx_change_mtu, .port_max_mtu = vsc73xx_get_max_mtu, + .phylink_get_caps = vsc73xx_phylink_get_caps, }; static int vsc73xx_gpio_get(struct gpio_chip *chip, unsigned int offset) diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h index db5eed06e92d..82f2363a45cd 100644 --- a/drivers/net/ethernet/altera/altera_tse.h +++ b/drivers/net/ethernet/altera/altera_tse.h @@ -472,7 +472,7 @@ struct altera_tse_private { /* ethtool msglvl option */ u32 msg_enable; - struct altera_dmaops *dmaops; + const struct altera_dmaops *dmaops; struct phylink *phylink; struct phylink_config phylink_config; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 1b1799985d1d..1c8763be0e4b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -29,13 +29,13 @@ #include <linux/mii.h> #include <linux/mdio/mdio-regmap.h> #include <linux/netdevice.h> -#include <linux/of_device.h> +#include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_net.h> -#include <linux/of_platform.h> #include <linux/pcs-lynx.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/skbuff.h> #include <asm/cacheflush.h> @@ -82,8 +82,6 @@ MODULE_PARM_DESC(dma_tx_num, "Number of descriptors in the TX list"); #define TXQUEUESTOP_THRESHHOLD 2 -static const struct of_device_id altera_tse_ids[]; - static inline u32 tse_tx_avail(struct altera_tse_private *priv) { return priv->tx_cons + priv->tx_ring_size - priv->tx_prod - 1; @@ -1133,7 +1131,6 @@ static int request_and_map(struct platform_device *pdev, const char *name, */ static int altera_tse_probe(struct platform_device *pdev) { - const struct of_device_id *of_id = NULL; struct regmap_config pcs_regmap_cfg; struct altera_tse_private *priv; struct mdio_regmap_config mrc; @@ -1159,11 +1156,7 @@ static int altera_tse_probe(struct platform_device *pdev) priv->dev = ndev; priv->msg_enable = netif_msg_init(debug, default_msg_level); - of_id = of_match_device(altera_tse_ids, &pdev->dev); - - if (of_id) - priv->dmaops = (struct altera_dmaops *)of_id->data; - + priv->dmaops = device_get_match_data(&pdev->dev); if (priv->dmaops && priv->dmaops->altera_dtype == ALTERA_DTYPE_SGDMA) { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 91842a5e161b..9131020d06af 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -123,9 +123,7 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_net.h> -#include <linux/of_address.h> #include <linux/of_platform.h> -#include <linux/of_device.h> #include <linux/clk.h> #include <linux/property.h> #include <linux/acpi.h> @@ -135,17 +133,6 @@ #include "xgbe-common.h" #ifdef CONFIG_ACPI -static const struct acpi_device_id xgbe_acpi_match[]; - -static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata) -{ - const struct acpi_device_id *id; - - id = acpi_match_device(xgbe_acpi_match, pdata->dev); - - return id ? (struct xgbe_version_data *)id->driver_data : NULL; -} - static int xgbe_acpi_support(struct xgbe_prv_data *pdata) { struct device *dev = pdata->dev; @@ -173,11 +160,6 @@ static int xgbe_acpi_support(struct xgbe_prv_data *pdata) return 0; } #else /* CONFIG_ACPI */ -static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata) -{ - return NULL; -} - static int xgbe_acpi_support(struct xgbe_prv_data *pdata) { return -EINVAL; @@ -185,17 +167,6 @@ static int xgbe_acpi_support(struct xgbe_prv_data *pdata) #endif /* CONFIG_ACPI */ #ifdef CONFIG_OF -static const struct of_device_id xgbe_of_match[]; - -static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata) -{ - const struct of_device_id *id; - - id = of_match_device(xgbe_of_match, pdata->dev); - - return id ? (struct xgbe_version_data *)id->data : NULL; -} - static int xgbe_of_support(struct xgbe_prv_data *pdata) { struct device *dev = pdata->dev; @@ -244,11 +215,6 @@ static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) return phy_pdev; } #else /* CONFIG_OF */ -static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata) -{ - return NULL; -} - static int xgbe_of_support(struct xgbe_prv_data *pdata) { return -EINVAL; @@ -290,12 +256,6 @@ static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata) return phy_pdev; } -static struct xgbe_version_data *xgbe_get_vdata(struct xgbe_prv_data *pdata) -{ - return pdata->use_acpi ? xgbe_acpi_vdata(pdata) - : xgbe_of_vdata(pdata); -} - static int xgbe_platform_probe(struct platform_device *pdev) { struct xgbe_prv_data *pdata; @@ -321,7 +281,7 @@ static int xgbe_platform_probe(struct platform_device *pdev) pdata->use_acpi = dev->of_node ? 0 : 1; /* Get the version data */ - pdata->vdata = xgbe_get_vdata(pdata); + pdata->vdata = (struct xgbe_version_data *)device_get_match_data(dev); phy_pdev = xgbe_get_phy_pdev(pdata); if (!phy_pdev) { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index b5d9f9a55b7f..56f2b3c229af 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2018,7 +2018,6 @@ static int xgene_enet_probe(struct platform_device *pdev) struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; void (*link_state)(struct work_struct *); - const struct of_device_id *of_id; int ret; ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), @@ -2039,19 +2038,7 @@ static int xgene_enet_probe(struct platform_device *pdev) NETIF_F_GRO | NETIF_F_SG; - of_id = of_match_device(xgene_enet_of_match, &pdev->dev); - if (of_id) { - pdata->enet_id = (uintptr_t)of_id->data; - } -#ifdef CONFIG_ACPI - else { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(xgene_enet_acpi_match, &pdev->dev); - if (acpi_id) - pdata->enet_id = (enum xgene_enet_id) acpi_id->driver_data; - } -#endif + pdata->enet_id = (enum xgene_enet_id)device_get_match_data(&pdev->dev); if (!pdata->enet_id) { ret = -ENODEV; goto err; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 643f5e646740..bce2c19e3f22 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -15,9 +15,10 @@ #include <linux/efi.h> #include <linux/irq.h> #include <linux/io.h> -#include <linux/of_platform.h> +#include <linux/of.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <linux/platform_device.h> #include <linux/mdio/mdio-xgene.h> #include <linux/module.h> #include <net/ip.h> diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 02aa6fd8ebc2..a9014d7932db 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2446,7 +2446,7 @@ static int atl1_rings_clean(struct napi_struct *napi, int budget) static inline int atl1_sched_rings_clean(struct atl1_adapter* adapter) { - if (!napi_schedule_prep(&adapter->napi)) + if (!napi_schedule(&adapter->napi)) /* It is possible in case even the RX/TX ints are disabled via IMR * register the ISR bits are set anyway (but do not produce IRQ). * To handle such situation the napi functions used to check is @@ -2454,8 +2454,6 @@ static inline int atl1_sched_rings_clean(struct atl1_adapter* adapter) */ return 0; - __napi_schedule(&adapter->napi); - /* * Disable RX/TX ints via IMR register if it is * allowed. NAPI handler must reenable them in same diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 105afde5dbe1..3196c4dea076 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2530,8 +2530,8 @@ static int bcm_enetsw_get_sset_count(struct net_device *netdev, static void bcm_enetsw_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - strncpy(drvinfo->driver, bcm_enet_driver_name, sizeof(drvinfo->driver)); - strncpy(drvinfo->bus_info, "bcm63xx", sizeof(drvinfo->bus_info)); + strscpy(drvinfo->driver, bcm_enet_driver_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->bus_info, "bcm63xx", sizeof(drvinfo->bus_info)); } static void bcm_enetsw_get_ethtool_stats(struct net_device *netdev, diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 91f3a7e78d65..9282403d1bf6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3247,23 +3247,6 @@ static irqreturn_t bcmgenet_wol_isr(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void bcmgenet_poll_controller(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - - /* Invoke the main RX/TX interrupt handler */ - disable_irq(priv->irq0); - bcmgenet_isr0(priv->irq0, priv); - enable_irq(priv->irq0); - - /* And the interrupt handler for RX/TX priority queues */ - disable_irq(priv->irq1); - bcmgenet_isr1(priv->irq1, priv); - enable_irq(priv->irq1); -} -#endif - static void bcmgenet_umac_reset(struct bcmgenet_priv *priv) { u32 reg; @@ -3720,9 +3703,6 @@ static const struct net_device_ops bcmgenet_netdev_ops = { .ndo_set_mac_address = bcmgenet_set_mac_addr, .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = bcmgenet_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = bcmgenet_poll_controller, -#endif .ndo_get_stats = bcmgenet_get_stats, .ndo_change_carrier = bcmgenet_change_carrier, }; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 14b311196b8f..59896bbf9e73 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6314,6 +6314,46 @@ err_out: return -EOPNOTSUPP; } +static void tg3_hwclock_to_timestamp(struct tg3 *tp, u64 hwclock, + struct skb_shared_hwtstamps *timestamp) +{ + memset(timestamp, 0, sizeof(struct skb_shared_hwtstamps)); + timestamp->hwtstamp = ns_to_ktime((hwclock & TG3_TSTAMP_MASK) + + tp->ptp_adjust); +} + +static void tg3_read_tx_tstamp(struct tg3 *tp, u64 *hwclock) +{ + *hwclock = tr32(TG3_TX_TSTAMP_LSB); + *hwclock |= (u64)tr32(TG3_TX_TSTAMP_MSB) << 32; +} + +static long tg3_ptp_ts_aux_work(struct ptp_clock_info *ptp) +{ + struct tg3 *tp = container_of(ptp, struct tg3, ptp_info); + struct skb_shared_hwtstamps timestamp; + u64 hwclock; + + if (tp->ptp_txts_retrycnt > 2) + goto done; + + tg3_read_tx_tstamp(tp, &hwclock); + + if (hwclock != tp->pre_tx_ts) { + tg3_hwclock_to_timestamp(tp, hwclock, ×tamp); + skb_tstamp_tx(tp->tx_tstamp_skb, ×tamp); + goto done; + } + tp->ptp_txts_retrycnt++; + return HZ / 10; +done: + dev_consume_skb_any(tp->tx_tstamp_skb); + tp->tx_tstamp_skb = NULL; + tp->ptp_txts_retrycnt = 0; + tp->pre_tx_ts = 0; + return -1; +} + static const struct ptp_clock_info tg3_ptp_caps = { .owner = THIS_MODULE, .name = "tg3 clock", @@ -6325,19 +6365,12 @@ static const struct ptp_clock_info tg3_ptp_caps = { .pps = 0, .adjfine = tg3_ptp_adjfine, .adjtime = tg3_ptp_adjtime, + .do_aux_work = tg3_ptp_ts_aux_work, .gettimex64 = tg3_ptp_gettimex, .settime64 = tg3_ptp_settime, .enable = tg3_ptp_enable, }; -static void tg3_hwclock_to_timestamp(struct tg3 *tp, u64 hwclock, - struct skb_shared_hwtstamps *timestamp) -{ - memset(timestamp, 0, sizeof(struct skb_shared_hwtstamps)); - timestamp->hwtstamp = ns_to_ktime((hwclock & TG3_TSTAMP_MASK) + - tp->ptp_adjust); -} - /* tp->lock must be held */ static void tg3_ptp_init(struct tg3 *tp) { @@ -6368,6 +6401,8 @@ static void tg3_ptp_fini(struct tg3 *tp) ptp_clock_unregister(tp->ptp_clock); tp->ptp_clock = NULL; tp->ptp_adjust = 0; + dev_consume_skb_any(tp->tx_tstamp_skb); + tp->tx_tstamp_skb = NULL; } static inline int tg3_irq_sync(struct tg3 *tp) @@ -6538,6 +6573,7 @@ static void tg3_tx(struct tg3_napi *tnapi) while (sw_idx != hw_idx) { struct tg3_tx_ring_info *ri = &tnapi->tx_buffers[sw_idx]; + bool complete_skb_later = false; struct sk_buff *skb = ri->skb; int i, tx_bug = 0; @@ -6548,12 +6584,17 @@ static void tg3_tx(struct tg3_napi *tnapi) if (tnapi->tx_ring[sw_idx].len_flags & TXD_FLAG_HWTSTAMP) { struct skb_shared_hwtstamps timestamp; - u64 hwclock = tr32(TG3_TX_TSTAMP_LSB); - hwclock |= (u64)tr32(TG3_TX_TSTAMP_MSB) << 32; - - tg3_hwclock_to_timestamp(tp, hwclock, ×tamp); + u64 hwclock; - skb_tstamp_tx(skb, ×tamp); + tg3_read_tx_tstamp(tp, &hwclock); + if (hwclock != tp->pre_tx_ts) { + tg3_hwclock_to_timestamp(tp, hwclock, ×tamp); + skb_tstamp_tx(skb, ×tamp); + tp->pre_tx_ts = 0; + } else { + tp->tx_tstamp_skb = skb; + complete_skb_later = true; + } } dma_unmap_single(&tp->pdev->dev, dma_unmap_addr(ri, mapping), @@ -6591,7 +6632,10 @@ static void tg3_tx(struct tg3_napi *tnapi) pkts_compl++; bytes_compl += skb->len; - dev_consume_skb_any(skb); + if (!complete_skb_later) + dev_consume_skb_any(skb); + else + ptp_schedule_worker(tp->ptp_clock, 0); if (unlikely(tx_bug)) { tg3_tx_recover(tp); @@ -8028,8 +8072,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) && tg3_flag(tp, TX_TSTAMP_EN)) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - base_flags |= TXD_FLAG_HWTSTAMP; + tg3_full_lock(tp, 0); + if (!tp->pre_tx_ts) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + base_flags |= TXD_FLAG_HWTSTAMP; + tg3_read_tx_tstamp(tp, &tp->pre_tx_ts); + } + tg3_full_unlock(tp); } len = skb_headlen(skb); diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 1000c894064f..ae5c01bd1110 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3190,6 +3190,7 @@ struct tg3 { struct ptp_clock_info ptp_info; struct ptp_clock *ptp_clock; s64 ptp_adjust; + u8 ptp_txts_retrycnt; /* begin "tx thread" cacheline section */ void (*write32_tx_mbox) (struct tg3 *, u32, @@ -3372,6 +3373,8 @@ struct tg3 { struct tg3_hw_stats *hw_stats; dma_addr_t stats_mapping; struct work_struct reset_task; + struct sk_buff *tx_tstamp_skb; + u64 pre_tx_ts; int nvram_lock_cnt; u32 nvram_size; diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index b07522ac3e74..9c80ab07a735 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -2839,7 +2839,7 @@ bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver) static void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer) { - strncpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); + strscpy_pad(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); } static void diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 9d56181a301f..d3e07b6ed5e1 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -442,10 +442,11 @@ lio_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) oct = lio->oct_dev; memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); - strcpy(drvinfo->driver, "liquidio"); - strncpy(drvinfo->fw_version, oct->fw_info.liquidio_firmware_version, - ETHTOOL_FWVERS_LEN); - strncpy(drvinfo->bus_info, pci_name(oct->pci_dev), 32); + strscpy(drvinfo->driver, "liquidio", sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, oct->fw_info.liquidio_firmware_version, + sizeof(drvinfo->fw_version)); + strscpy(drvinfo->bus_info, pci_name(oct->pci_dev), + sizeof(drvinfo->bus_info)); } static void @@ -458,10 +459,11 @@ lio_get_vf_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) oct = lio->oct_dev; memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); - strcpy(drvinfo->driver, "liquidio_vf"); - strncpy(drvinfo->fw_version, oct->fw_info.liquidio_firmware_version, - ETHTOOL_FWVERS_LEN); - strncpy(drvinfo->bus_info, pci_name(oct->pci_dev), 32); + strscpy(drvinfo->driver, "liquidio_vf", sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, oct->fw_info.liquidio_firmware_version, + sizeof(drvinfo->fw_version)); + strscpy(drvinfo->bus_info, pci_name(oct->pci_dev), + sizeof(drvinfo->bus_info)); } static int diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 100daadbea2a..34f02a8ec2ca 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1689,7 +1689,7 @@ static int load_firmware(struct octeon_device *oct) if (fw_type_is_auto()) { tmp_fw_type = LIO_FW_NAME_TYPE_NIC; - strncpy(fw_type, tmp_fw_type, sizeof(fw_type)); + strscpy_pad(fw_type, tmp_fw_type, sizeof(fw_type)); } else { tmp_fw_type = fw_type; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 600de587d7a9..aa6c0dfb6f1c 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -638,7 +638,8 @@ lio_vf_rep_netdev_event(struct notifier_block *nb, memset(&rep_cfg, 0, sizeof(rep_cfg)); rep_cfg.req_type = LIO_VF_REP_REQ_DEVNAME; rep_cfg.ifidx = vf_rep->ifidx; - strncpy(rep_cfg.rep_name.name, ndev->name, LIO_IF_NAME_SIZE); + strscpy(rep_cfg.rep_name.name, ndev->name, + sizeof(rep_cfg.rep_name.name)); ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, sizeof(rep_cfg), NULL, 0); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 364f4f912dc2..6b6cb73482d7 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1217,10 +1217,10 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) goto core_drv_init_err; } - strncpy(app_name, + strscpy(app_name, get_oct_app_string( (u32)recv_pkt->rh.r_core_drv_init.app_mode), - sizeof(app_name) - 1); + sizeof(app_name)); oct->app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; if (recv_pkt->rh.r_core_drv_init.app_mode == CVM_DRV_NIC_APP) { oct->fw_info.max_nic_ports = @@ -1257,9 +1257,10 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) memcpy(cs, get_rbd( recv_pkt->buffer_ptr[0]) + OCT_DROQ_INFO_SIZE, sizeof(*cs)); - strncpy(oct->boardinfo.name, cs->boardname, OCT_BOARD_NAME); - strncpy(oct->boardinfo.serial_number, cs->board_serial_number, - OCT_SERIAL_LEN); + strscpy(oct->boardinfo.name, cs->boardname, + sizeof(oct->boardinfo.name)); + strscpy(oct->boardinfo.serial_number, cs->board_serial_number, + sizeof(oct->boardinfo.serial_number)); octeon_swap_8B_data((u64 *)cs, (sizeof(*cs) >> 3)); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 2e9a74fe0970..dfe4e0102960 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2674,12 +2674,7 @@ static int rspq_check_napi(struct sge_qset *qs) { struct sge_rspq *q = &qs->rspq; - if (!napi_is_scheduled(&qs->napi) && - is_new_response(&q->desc[q->cidx], q)) { - napi_schedule(&qs->napi); - return 1; - } - return 0; + return is_new_response(&q->desc[q->cidx], q) && napi_schedule(&qs->napi); } /* diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 98dd78551d89..b5ff2e1a9975 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -4261,7 +4261,7 @@ static void sge_rx_timer_cb(struct timer_list *t) if (fl_starving(adap, fl)) { rxq = container_of(fl, struct sge_eth_rxq, fl); - if (napi_reschedule(&rxq->rspq.napi)) + if (napi_schedule(&rxq->rspq.napi)) fl->starving++; else set_bit(id, s->starving_fl); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 2d0cf76fb3c5..5b1d746e6563 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2094,7 +2094,7 @@ static void sge_rx_timer_cb(struct timer_list *t) struct sge_eth_rxq *rxq; rxq = container_of(fl, struct sge_eth_rxq, fl); - if (napi_reschedule(&rxq->rspq.napi)) + if (napi_schedule(&rxq->rspq.napi)) fl->starving++; else set_bit(id, s->starving_fl); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index bcdc7fc2f427..6482728794dd 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -361,9 +361,7 @@ static void chcr_ktls_dev_del(struct net_device *netdev, struct tls_context *tls_ctx, enum tls_offload_ctx_dir direction) { - struct chcr_ktls_ofld_ctx_tx *tx_ctx = - chcr_get_ktls_tx_context(tls_ctx); - struct chcr_ktls_info *tx_info = tx_ctx->chcr_info; + struct chcr_ktls_info *tx_info = chcr_get_ktls_tx_info(tls_ctx); struct ch_ktls_port_stats_debug *port_stats; struct chcr_ktls_uld_ctx *u_ctx; @@ -396,7 +394,7 @@ static void chcr_ktls_dev_del(struct net_device *netdev, port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id]; atomic64_inc(&port_stats->ktls_tx_connection_close); kvfree(tx_info); - tx_ctx->chcr_info = NULL; + chcr_set_ktls_tx_info(tls_ctx, NULL); /* release module refcount */ module_put(THIS_MODULE); } @@ -417,7 +415,6 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct ch_ktls_port_stats_debug *port_stats; - struct chcr_ktls_ofld_ctx_tx *tx_ctx; struct chcr_ktls_uld_ctx *u_ctx; struct chcr_ktls_info *tx_info; struct dst_entry *dst; @@ -427,8 +424,6 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, u8 daaddr[16]; int ret = -1; - tx_ctx = chcr_get_ktls_tx_context(tls_ctx); - pi = netdev_priv(netdev); adap = pi->adapter; port_stats = &adap->ch_ktls_stats.ktls_port[pi->port_id]; @@ -440,7 +435,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, goto out; } - if (tx_ctx->chcr_info) + if (chcr_get_ktls_tx_info(tls_ctx)) goto out; if (u_ctx && u_ctx->detach) @@ -566,7 +561,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, goto free_tid; atomic64_inc(&port_stats->ktls_tx_ctx); - tx_ctx->chcr_info = tx_info; + chcr_set_ktls_tx_info(tls_ctx, tx_info); return 0; @@ -647,7 +642,7 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, { const struct cpl_act_open_rpl *p = (void *)input; struct chcr_ktls_info *tx_info = NULL; - struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct tls_offload_context_tx *tx_ctx; struct chcr_ktls_uld_ctx *u_ctx; unsigned int atid, tid, status; struct tls_context *tls_ctx; @@ -686,7 +681,7 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family); /* Adding tid */ tls_ctx = tls_get_ctx(tx_info->sk); - tx_ctx = chcr_get_ktls_tx_context(tls_ctx); + tx_ctx = tls_offload_ctx_tx(tls_ctx); u_ctx = adap->uld[CXGB4_ULD_KTLS].handle; if (u_ctx) { ret = xa_insert_bh(&u_ctx->tid_list, tid, tx_ctx, @@ -1924,7 +1919,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) { u32 tls_end_offset, tcp_seq, skb_data_len, skb_offset; struct ch_ktls_port_stats_debug *port_stats; - struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct tls_offload_context_tx *tx_ctx; struct ch_ktls_stats_debug *stats; struct tcphdr *th = tcp_hdr(skb); int data_len, qidx, ret = 0, mss; @@ -1944,6 +1939,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : data_len; tls_ctx = tls_get_ctx(skb->sk); + tx_ctx = tls_offload_ctx_tx(tls_ctx); tls_netdev = rcu_dereference_bh(tls_ctx->netdev); /* Don't quit on NULL: if tls_device_down is running in parallel, * netdev might become NULL, even if tls_is_skb_tx_device_offloaded was @@ -1952,8 +1948,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(tls_netdev && tls_netdev != dev)) goto out; - tx_ctx = chcr_get_ktls_tx_context(tls_ctx); - tx_info = tx_ctx->chcr_info; + tx_info = chcr_get_ktls_tx_info(tls_ctx); if (unlikely(!tx_info)) goto out; @@ -1979,19 +1974,19 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) * we will send the complete record again. */ - spin_lock_irqsave(&tx_ctx->base.lock, flags); + spin_lock_irqsave(&tx_ctx->lock, flags); do { cxgb4_reclaim_completed_tx(adap, &q->q, true); /* fetch the tls record */ - record = tls_get_record(&tx_ctx->base, tcp_seq, + record = tls_get_record(tx_ctx, tcp_seq, &tx_info->record_no); /* By the time packet reached to us, ACK is received, and record * won't be found in that case, handle it gracefully. */ if (unlikely(!record)) { - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + spin_unlock_irqrestore(&tx_ctx->lock, flags); atomic64_inc(&port_stats->ktls_tx_drop_no_sync_data); goto out; } @@ -2015,7 +2010,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_end_offset != record->len); if (ret) { - spin_unlock_irqrestore(&tx_ctx->base.lock, + spin_unlock_irqrestore(&tx_ctx->lock, flags); goto out; } @@ -2046,7 +2041,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) /* free the refcount taken earlier */ if (tls_end_offset < data_len) dev_kfree_skb_any(skb); - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + spin_unlock_irqrestore(&tx_ctx->lock, flags); goto out; } @@ -2082,7 +2077,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) /* if any failure, come out from the loop. */ if (ret) { - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + spin_unlock_irqrestore(&tx_ctx->lock, flags); if (th->fin) dev_kfree_skb_any(skb); @@ -2097,7 +2092,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) } while (data_len > 0); - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); + spin_unlock_irqrestore(&tx_ctx->lock, flags); atomic64_inc(&port_stats->ktls_tx_encrypted_packets); atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); @@ -2185,17 +2180,17 @@ static void clear_conn_resources(struct chcr_ktls_info *tx_info) static void ch_ktls_reset_all_conn(struct chcr_ktls_uld_ctx *u_ctx) { struct ch_ktls_port_stats_debug *port_stats; - struct chcr_ktls_ofld_ctx_tx *tx_ctx; + struct tls_offload_context_tx *tx_ctx; struct chcr_ktls_info *tx_info; unsigned long index; xa_for_each(&u_ctx->tid_list, index, tx_ctx) { - tx_info = tx_ctx->chcr_info; + tx_info = __chcr_get_ktls_tx_info(tx_ctx); clear_conn_resources(tx_info); port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id]; atomic64_inc(&port_stats->ktls_tx_connection_close); kvfree(tx_info); - tx_ctx->chcr_info = NULL; + memset(tx_ctx->driver_state, 0, TLS_DRIVER_STATE_SIZE_TX); /* release module refcount */ module_put(THIS_MODULE); } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h index 10572dc55365..dbbba92bf540 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h @@ -67,8 +67,7 @@ struct chcr_ktls_info { bool pending_close; }; -struct chcr_ktls_ofld_ctx_tx { - struct tls_offload_context_tx base; +struct chcr_ktls_ctx_tx { struct chcr_ktls_info *chcr_info; }; @@ -79,14 +78,33 @@ struct chcr_ktls_uld_ctx { bool detach; }; -static inline struct chcr_ktls_ofld_ctx_tx * -chcr_get_ktls_tx_context(struct tls_context *tls_ctx) +static inline struct chcr_ktls_info * +__chcr_get_ktls_tx_info(struct tls_offload_context_tx *octx) { - BUILD_BUG_ON(sizeof(struct chcr_ktls_ofld_ctx_tx) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); - return container_of(tls_offload_ctx_tx(tls_ctx), - struct chcr_ktls_ofld_ctx_tx, - base); + struct chcr_ktls_ctx_tx *priv_ctx; + + BUILD_BUG_ON(sizeof(struct chcr_ktls_ctx_tx) > TLS_DRIVER_STATE_SIZE_TX); + priv_ctx = (struct chcr_ktls_ctx_tx *)octx->driver_state; + return priv_ctx->chcr_info; +} + +static inline struct chcr_ktls_info * +chcr_get_ktls_tx_info(struct tls_context *tls_ctx) +{ + struct chcr_ktls_ctx_tx *priv_ctx; + + BUILD_BUG_ON(sizeof(struct chcr_ktls_ctx_tx) > TLS_DRIVER_STATE_SIZE_TX); + priv_ctx = (struct chcr_ktls_ctx_tx *)__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); + return priv_ctx->chcr_info; +} + +static inline void +chcr_set_ktls_tx_info(struct tls_context *tls_ctx, struct chcr_ktls_info *chcr_info) +{ + struct chcr_ktls_ctx_tx *priv_ctx; + + priv_ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); + priv_ctx->chcr_info = chcr_info; } static inline int chcr_get_first_rx_qid(struct adapter *adap) diff --git a/drivers/net/ethernet/engleder/tsnep_hw.h b/drivers/net/ethernet/engleder/tsnep_hw.h index 55e1caf193a6..64c97eb66f67 100644 --- a/drivers/net/ethernet/engleder/tsnep_hw.h +++ b/drivers/net/ethernet/engleder/tsnep_hw.h @@ -181,6 +181,8 @@ struct tsnep_gcl_operation { #define TSNEP_DESC_SIZE 256 #define TSNEP_DESC_SIZE_DATA_AFTER 2048 #define TSNEP_DESC_OFFSET 128 +#define TSNEP_DESC_SIZE_DATA_AFTER_INLINE (64 - sizeof(struct tsnep_tx_desc) + \ + sizeof_field(struct tsnep_tx_desc, tx)) #define TSNEP_DESC_OWNER_COUNTER_MASK 0xC0000000 #define TSNEP_DESC_OWNER_COUNTER_SHIFT 30 #define TSNEP_DESC_LENGTH_MASK 0x00003FFF diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 77a3fcb821c8..f16b6b3e21a6 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -51,12 +51,22 @@ #define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \ ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1) -#define TSNEP_TX_TYPE_SKB BIT(0) -#define TSNEP_TX_TYPE_SKB_FRAG BIT(1) -#define TSNEP_TX_TYPE_XDP_TX BIT(2) -#define TSNEP_TX_TYPE_XDP_NDO BIT(3) -#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) -#define TSNEP_TX_TYPE_XSK BIT(4) +/* mapping type */ +#define TSNEP_TX_TYPE_MAP BIT(0) +#define TSNEP_TX_TYPE_MAP_PAGE BIT(1) +#define TSNEP_TX_TYPE_INLINE BIT(2) +/* buffer type */ +#define TSNEP_TX_TYPE_SKB BIT(8) +#define TSNEP_TX_TYPE_SKB_MAP (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_MAP) +#define TSNEP_TX_TYPE_SKB_INLINE (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_INLINE) +#define TSNEP_TX_TYPE_SKB_FRAG BIT(9) +#define TSNEP_TX_TYPE_SKB_FRAG_MAP_PAGE (TSNEP_TX_TYPE_SKB_FRAG | TSNEP_TX_TYPE_MAP_PAGE) +#define TSNEP_TX_TYPE_SKB_FRAG_INLINE (TSNEP_TX_TYPE_SKB_FRAG | TSNEP_TX_TYPE_INLINE) +#define TSNEP_TX_TYPE_XDP_TX BIT(10) +#define TSNEP_TX_TYPE_XDP_NDO BIT(11) +#define TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE (TSNEP_TX_TYPE_XDP_NDO | TSNEP_TX_TYPE_MAP_PAGE) +#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) +#define TSNEP_TX_TYPE_XSK BIT(12) #define TSNEP_XDP_TX BIT(0) #define TSNEP_XDP_REDIRECT BIT(1) @@ -416,6 +426,8 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, entry->properties |= TSNEP_TX_DESC_OWNER_USER_FLAG; entry->desc->more_properties = __cpu_to_le32(entry->len & TSNEP_DESC_LENGTH_MASK); + if (entry->type & TSNEP_TX_TYPE_INLINE) + entry->properties |= TSNEP_TX_DESC_DATA_AFTER_DESC_FLAG; /* descriptor properties shall be written last, because valid data is * signaled there @@ -433,39 +445,79 @@ static int tsnep_tx_desc_available(struct tsnep_tx *tx) return tx->read - tx->write - 1; } +static int tsnep_tx_map_frag(skb_frag_t *frag, struct tsnep_tx_entry *entry, + struct device *dmadev, dma_addr_t *dma) +{ + unsigned int len; + int mapped; + + len = skb_frag_size(frag); + if (likely(len > TSNEP_DESC_SIZE_DATA_AFTER_INLINE)) { + *dma = skb_frag_dma_map(dmadev, frag, 0, len, DMA_TO_DEVICE); + if (dma_mapping_error(dmadev, *dma)) + return -ENOMEM; + entry->type = TSNEP_TX_TYPE_SKB_FRAG_MAP_PAGE; + mapped = 1; + } else { + void *fragdata = skb_frag_address_safe(frag); + + if (likely(fragdata)) { + memcpy(&entry->desc->tx, fragdata, len); + } else { + struct page *page = skb_frag_page(frag); + + fragdata = kmap_local_page(page); + memcpy(&entry->desc->tx, fragdata + skb_frag_off(frag), + len); + kunmap_local(fragdata); + } + entry->type = TSNEP_TX_TYPE_SKB_FRAG_INLINE; + mapped = 0; + } + + return mapped; +} + static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) { struct device *dmadev = tx->adapter->dmadev; struct tsnep_tx_entry *entry; unsigned int len; - dma_addr_t dma; int map_len = 0; - int i; + dma_addr_t dma; + int i, mapped; for (i = 0; i < count; i++) { entry = &tx->entry[(tx->write + i) & TSNEP_RING_MASK]; if (!i) { len = skb_headlen(skb); - dma = dma_map_single(dmadev, skb->data, len, - DMA_TO_DEVICE); - - entry->type = TSNEP_TX_TYPE_SKB; + if (likely(len > TSNEP_DESC_SIZE_DATA_AFTER_INLINE)) { + dma = dma_map_single(dmadev, skb->data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(dmadev, dma)) + return -ENOMEM; + entry->type = TSNEP_TX_TYPE_SKB_MAP; + mapped = 1; + } else { + memcpy(&entry->desc->tx, skb->data, len); + entry->type = TSNEP_TX_TYPE_SKB_INLINE; + mapped = 0; + } } else { - len = skb_frag_size(&skb_shinfo(skb)->frags[i - 1]); - dma = skb_frag_dma_map(dmadev, - &skb_shinfo(skb)->frags[i - 1], - 0, len, DMA_TO_DEVICE); + skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - entry->type = TSNEP_TX_TYPE_SKB_FRAG; + len = skb_frag_size(frag); + mapped = tsnep_tx_map_frag(frag, entry, dmadev, &dma); + if (mapped < 0) + return mapped; } - if (dma_mapping_error(dmadev, dma)) - return -ENOMEM; entry->len = len; - dma_unmap_addr_set(entry, dma, dma); - - entry->desc->tx = __cpu_to_le64(dma); + if (likely(mapped)) { + dma_unmap_addr_set(entry, dma, dma); + entry->desc->tx = __cpu_to_le64(dma); + } map_len += len; } @@ -484,13 +536,12 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count) entry = &tx->entry[(index + i) & TSNEP_RING_MASK]; if (entry->len) { - if (entry->type & TSNEP_TX_TYPE_SKB) + if (entry->type & TSNEP_TX_TYPE_MAP) dma_unmap_single(dmadev, dma_unmap_addr(entry, dma), dma_unmap_len(entry, len), DMA_TO_DEVICE); - else if (entry->type & - (TSNEP_TX_TYPE_SKB_FRAG | TSNEP_TX_TYPE_XDP_NDO)) + else if (entry->type & TSNEP_TX_TYPE_MAP_PAGE) dma_unmap_page(dmadev, dma_unmap_addr(entry, dma), dma_unmap_len(entry, len), @@ -586,7 +637,7 @@ static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx, if (dma_mapping_error(dmadev, dma)) return -ENOMEM; - entry->type = TSNEP_TX_TYPE_XDP_NDO; + entry->type = TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE; } else { page = unlikely(frag) ? skb_frag_page(frag) : virt_to_page(xdpf->data); diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index edf000e7bab4..4d7184d46824 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -198,7 +198,7 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) */ if (nps_enet_is_tx_pending(priv)) { nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0); - napi_reschedule(napi); + napi_schedule(napi); } } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b83346708881..5eb756871a96 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -52,11 +52,11 @@ #include <linux/clk.h> #include <linux/crc32.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/mdio.h> #include <linux/phy.h> #include <linux/fec.h> #include <linux/of.h> -#include <linux/of_device.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include <linux/regulator/consumer.h> @@ -2907,12 +2907,10 @@ static void fec_enet_get_strings(struct net_device *netdev, switch (stringset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(fec_stats); i++) { - memcpy(data, fec_stats[i].name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "%s", fec_stats[i].name); } for (i = 0; i < ARRAY_SIZE(fec_xdp_stat_strs); i++) { - strncpy(data, fec_xdp_stat_strs[i], ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; + ethtool_sprintf(&data, "%s", fec_xdp_stat_strs[i]); } page_pool_ethtool_stats_get_strings(data); @@ -4292,14 +4290,13 @@ fec_probe(struct platform_device *pdev) phy_interface_t interface; struct net_device *ndev; int i, irq, ret = 0; - const struct of_device_id *of_id; static int dev_id; struct device_node *np = pdev->dev.of_node, *phy_node; int num_tx_qs; int num_rx_qs; char irq_name[8]; int irq_cnt; - struct fec_devinfo *dev_info; + const struct fec_devinfo *dev_info; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -4314,10 +4311,9 @@ fec_probe(struct platform_device *pdev) /* setup board info structure */ fep = netdev_priv(ndev); - of_id = of_match_device(fec_dt_ids, &pdev->dev); - if (of_id) - pdev->id_entry = of_id->data; - dev_info = (struct fec_devinfo *)pdev->id_entry->driver_data; + dev_info = device_get_match_data(&pdev->dev); + if (!dev_info) + dev_info = (const struct fec_devinfo *)pdev->id_entry->driver_data; if (dev_info) fep->quirks = dev_info->quirks; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index a6dfc8807d3d..cf392faa6105 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -35,10 +35,9 @@ #include <linux/fs.h> #include <linux/platform_device.h> #include <linux/phy.h> +#include <linux/property.h> #include <linux/of.h> #include <linux/of_mdio.h> -#include <linux/of_platform.h> -#include <linux/of_gpio.h> #include <linux/of_net.h> #include <linux/pgtable.h> @@ -884,9 +883,9 @@ static const struct ethtool_ops fs_ethtool_ops = { /**************************************************************************************/ #ifdef CONFIG_FS_ENET_HAS_FEC -#define IS_FEC(match) ((match)->data == &fs_fec_ops) +#define IS_FEC(ops) ((ops) == &fs_fec_ops) #else -#define IS_FEC(match) 0 +#define IS_FEC(ops) 0 #endif static const struct net_device_ops fs_enet_netdev_ops = { @@ -903,10 +902,9 @@ static const struct net_device_ops fs_enet_netdev_ops = { #endif }; -static const struct of_device_id fs_enet_match[]; static int fs_enet_probe(struct platform_device *ofdev) { - const struct of_device_id *match; + const struct fs_ops *ops; struct net_device *ndev; struct fs_enet_private *fep; struct fs_platform_info *fpi; @@ -916,15 +914,15 @@ static int fs_enet_probe(struct platform_device *ofdev) const char *phy_connection_type; int privsize, len, ret = -ENODEV; - match = of_match_device(fs_enet_match, &ofdev->dev); - if (!match) + ops = device_get_match_data(&ofdev->dev); + if (!ops) return -EINVAL; fpi = kzalloc(sizeof(*fpi), GFP_KERNEL); if (!fpi) return -ENOMEM; - if (!IS_FEC(match)) { + if (!IS_FEC(ops)) { data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len); if (!data || len != 4) goto out_free_fpi; @@ -986,7 +984,7 @@ static int fs_enet_probe(struct platform_device *ofdev) fep->dev = &ofdev->dev; fep->ndev = ndev; fep->fpi = fpi; - fep->ops = match->data; + fep->ops = ops; ret = fep->ops->setup_data(ndev); if (ret) diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index a1e777a4b75f..7bb69727952a 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -30,9 +30,10 @@ #include <linux/ethtool.h> #include <linux/bitops.h> #include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_mdio.h> -#include <linux/of_platform.h> #include <linux/pgtable.h> #include <asm/irq.h> @@ -96,20 +97,15 @@ static int fs_enet_fec_mii_write(struct mii_bus *bus, int phy_id, int location, } -static const struct of_device_id fs_enet_mdio_fec_match[]; static int fs_enet_mdio_probe(struct platform_device *ofdev) { - const struct of_device_id *match; struct resource res; struct mii_bus *new_bus; struct fec_info *fec; int (*get_bus_freq)(struct device *); int ret = -ENOMEM, clock, speed; - match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev); - if (!match) - return -EINVAL; - get_bus_freq = match->data; + get_bus_freq = device_get_match_data(&ofdev->dev); new_bus = mdiobus_alloc(); if (!new_bus) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index eee675a25b2c..70dd982a5edc 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -19,9 +19,10 @@ #include <linux/delay.h> #include <linux/module.h> #include <linux/mii.h> +#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_mdio.h> -#include <linux/of_device.h> +#include <linux/property.h> #include <asm/io.h> #if IS_ENABLED(CONFIG_UCC_GETH) @@ -407,8 +408,6 @@ static void set_tbipa(const u32 tbipa_val, struct platform_device *pdev, static int fsl_pq_mdio_probe(struct platform_device *pdev) { - const struct of_device_id *id = - of_match_device(fsl_pq_mdio_match, &pdev->dev); const struct fsl_pq_mdio_data *data; struct device_node *np = pdev->dev.of_node; struct resource res; @@ -417,15 +416,12 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) struct mii_bus *new_bus; int err; - if (!id) { + data = device_get_match_data(&pdev->dev); + if (!data) { dev_err(&pdev->dev, "Failed to match device\n"); return -ENODEV; } - data = id->data; - - dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible); - new_bus = mdiobus_alloc_size(sizeof(*priv)); if (!new_bus) return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 83b09dcfafc4..276f996f95dc 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -281,7 +281,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->rx) reschedule |= gve_rx_work_pending(block->rx); - if (reschedule && napi_reschedule(napi)) + if (reschedule && napi_schedule(napi)) iowrite32be(GVE_IRQ_MASK, irq_doorbell); } return work_done; diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 506fa3d8bbee..1a972b093a42 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -7,7 +7,8 @@ #include <linux/interrupt.h> #include <linux/etherdevice.h> #include <linux/platform_device.h> -#include <linux/of_device.h> +#include <linux/property.h> +#include <linux/of.h> #include <linux/of_net.h> #include <linux/of_mdio.h> #include <linux/reset.h> @@ -1094,7 +1095,6 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = dev->of_node; - const struct of_device_id *of_id = NULL; struct net_device *ndev; struct hix5hd2_priv *priv; struct mii_bus *bus; @@ -1110,12 +1110,7 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) priv->dev = dev; priv->netdev = ndev; - of_id = of_match_device(hix5hd2_of_match, dev); - if (!of_id) { - ret = -EINVAL; - goto out_free_netdev; - } - priv->hw_cap = (unsigned long)of_id->data; + priv->hw_cap = (unsigned long)device_get_match_data(dev); priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index aaf1f42624a7..d7e175a9cb49 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -103,6 +103,7 @@ enum HNAE3_DEV_CAP_BITS { HNAE3_DEV_SUPPORT_LANE_NUM_B, HNAE3_DEV_SUPPORT_WOL_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B, + HNAE3_DEV_SUPPORT_VF_FAULT_B, }; #define hnae3_ae_dev_fd_supported(ae_dev) \ @@ -177,6 +178,9 @@ enum HNAE3_DEV_CAP_BITS { #define hnae3_ae_dev_tm_flush_supported(hdev) \ test_bit(HNAE3_DEV_SUPPORT_TM_FLUSH_B, (hdev)->ae_dev->caps) +#define hnae3_ae_dev_vf_fault_supported(ae_dev) \ + test_bit(HNAE3_DEV_SUPPORT_VF_FAULT_B, (ae_dev)->caps) + enum HNAE3_PF_CAP_BITS { HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0, }; @@ -271,6 +275,7 @@ enum hnae3_reset_type { HNAE3_GLOBAL_RESET, HNAE3_IMP_RESET, HNAE3_NONE_RESET, + HNAE3_VF_EXP_RESET, HNAE3_MAX_RESET, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c index dcecb23daac6..d92ad6082d8e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c @@ -157,6 +157,7 @@ static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = { {HCLGE_COMM_CAP_LANE_NUM_B, HNAE3_DEV_SUPPORT_LANE_NUM_B}, {HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B}, {HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B}, + {HCLGE_COMM_CAP_VF_FAULT_B, HNAE3_DEV_SUPPORT_VF_FAULT_B}, }; static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h index 2b7197ce0ae8..533c19d25e4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h @@ -93,6 +93,7 @@ enum hclge_opcode_type { HCLGE_OPC_DFX_SSU_REG_2 = 0x004F, HCLGE_OPC_QUERY_DEV_SPECS = 0x0050, + HCLGE_OPC_GET_QUEUE_ERR_VF = 0x0067, /* MAC command */ HCLGE_OPC_CONFIG_MAC_MODE = 0x0301, @@ -348,6 +349,7 @@ enum HCLGE_COMM_CAP_BITS { HCLGE_COMM_CAP_GRO_B = 20, HCLGE_COMM_CAP_FD_B = 21, HCLGE_COMM_CAP_FEC_STATS_B = 25, + HCLGE_COMM_CAP_VF_FAULT_B = 26, HCLGE_COMM_CAP_LANE_NUM_B = 27, HCLGE_COMM_CAP_WOL_B = 28, HCLGE_COMM_CAP_TM_FLUSH_B = 31, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index b8508533878b..0b138635bafa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -414,6 +414,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = { }, { .name = "support tm flush", .cap_bit = HNAE3_DEV_SUPPORT_TM_FLUSH_B, + }, { + .name = "support vf fault detect", + .cap_bit = HNAE3_DEV_SUPPORT_VF_FAULT_B, } }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 3f35227ef1fa..d63e114f93d0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1301,10 +1301,12 @@ static const struct hclge_hw_type_id hclge_hw_type_id_st[] = { .msg = "tqp_int_ecc_error" }, { .type_id = PF_ABNORMAL_INT_ERROR, - .msg = "pf_abnormal_int_error" + .msg = "pf_abnormal_int_error", + .cause_by_vf = true }, { .type_id = MPF_ABNORMAL_INT_ERROR, - .msg = "mpf_abnormal_int_error" + .msg = "mpf_abnormal_int_error", + .cause_by_vf = true }, { .type_id = COMMON_ERROR, .msg = "common_error" @@ -2759,7 +2761,7 @@ void hclge_handle_occurred_error(struct hclge_dev *hdev) hclge_handle_error_info_log(ae_dev); } -static void +static bool hclge_handle_error_type_reg_log(struct device *dev, struct hclge_mod_err_info *mod_info, struct hclge_type_reg_err_info *type_reg_info) @@ -2770,6 +2772,7 @@ hclge_handle_error_type_reg_log(struct device *dev, u8 mod_id, total_module, type_id, total_type, i, is_ras; u8 index_module = MODULE_NONE; u8 index_type = NONE_ERROR; + bool cause_by_vf = false; mod_id = mod_info->mod_id; type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK; @@ -2788,6 +2791,7 @@ hclge_handle_error_type_reg_log(struct device *dev, for (i = 0; i < total_type; i++) { if (type_id == hclge_hw_type_id_st[i].type_id) { index_type = i; + cause_by_vf = hclge_hw_type_id_st[i].cause_by_vf; break; } } @@ -2805,6 +2809,8 @@ hclge_handle_error_type_reg_log(struct device *dev, dev_err(dev, "reg_value:\n"); for (i = 0; i < type_reg_info->reg_num; i++) dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); + + return cause_by_vf; } static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, @@ -2815,6 +2821,7 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, struct device *dev = &hdev->pdev->dev; struct hclge_mod_err_info *mod_info; struct hclge_sum_err_info *sum_info; + bool cause_by_vf = false; u8 mod_num, err_num, i; u32 offset = 0; @@ -2843,12 +2850,16 @@ static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, type_reg_info = (struct hclge_type_reg_err_info *) &buf[offset++]; - hclge_handle_error_type_reg_log(dev, mod_info, - type_reg_info); + if (hclge_handle_error_type_reg_log(dev, mod_info, + type_reg_info)) + cause_by_vf = true; offset += type_reg_info->reg_num; } } + + if (hnae3_ae_dev_vf_fault_supported(hdev->ae_dev) && cause_by_vf) + set_bit(HNAE3_VF_EXP_RESET, &ae_dev->hw_err_reset_req); } static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num) @@ -2940,3 +2951,98 @@ err_desc: out: return ret; } + +static bool hclge_reset_vf_in_bitmap(struct hclge_dev *hdev, + unsigned long *bitmap) +{ + struct hclge_vport *vport; + bool exist_set = false; + int func_id; + int ret; + + func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM); + if (func_id == PF_VPORT_ID) + return false; + + while (func_id != HCLGE_VPORT_NUM) { + vport = hclge_get_vf_vport(hdev, + func_id - HCLGE_VF_VPORT_START_NUM); + if (!vport) { + dev_err(&hdev->pdev->dev, "invalid func id(%d)\n", + func_id); + return false; + } + + dev_info(&hdev->pdev->dev, "do function %d recovery.", func_id); + + ret = hclge_reset_tqp(&vport->nic); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to reset tqp, ret = %d.", ret); + return false; + } + + ret = hclge_inform_vf_reset(vport, HNAE3_VF_FUNC_RESET); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to reset func %d, ret = %d.", + func_id, ret); + return false; + } + + exist_set = true; + clear_bit(func_id, bitmap); + func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM); + } + + return exist_set; +} + +static void hclge_get_vf_fault_bitmap(struct hclge_desc *desc, + unsigned long *bitmap) +{ +#define HCLGE_FIR_FAULT_BYTES 24 +#define HCLGE_SEC_FAULT_BYTES 8 + + u8 *buff; + + BUILD_BUG_ON(HCLGE_FIR_FAULT_BYTES + HCLGE_SEC_FAULT_BYTES != + BITS_TO_BYTES(HCLGE_VPORT_NUM)); + + memcpy(bitmap, desc[0].data, HCLGE_FIR_FAULT_BYTES); + buff = (u8 *)bitmap + HCLGE_FIR_FAULT_BYTES; + memcpy(buff, desc[1].data, HCLGE_SEC_FAULT_BYTES); +} + +int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev) +{ + unsigned long vf_fault_bitmap[BITS_TO_LONGS(HCLGE_VPORT_NUM)]; + struct hclge_desc desc[2]; + bool cause_by_vf = false; + int ret; + + if (!test_and_clear_bit(HNAE3_VF_EXP_RESET, + &hdev->ae_dev->hw_err_reset_req) || + !hnae3_ae_dev_vf_fault_supported(hdev->ae_dev)) + return 0; + + hclge_comm_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_GET_QUEUE_ERR_VF, + true); + desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); + hclge_comm_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_GET_QUEUE_ERR_VF, + true); + + ret = hclge_comm_cmd_send(&hdev->hw.hw, desc, 2); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get vf bitmap, ret = %d.\n", ret); + return ret; + } + hclge_get_vf_fault_bitmap(desc, vf_fault_bitmap); + + cause_by_vf = hclge_reset_vf_in_bitmap(hdev, vf_fault_bitmap); + if (cause_by_vf) + hdev->ae_dev->hw_err_reset_req = 0; + + return 0; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 86be6fb32990..68b738affa66 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -196,6 +196,7 @@ struct hclge_hw_module_id { struct hclge_hw_type_id { enum hclge_err_type_list type_id; const char *msg; + bool cause_by_vf; /* indicate the error may from vf exception */ }; struct hclge_sum_err_info { @@ -228,4 +229,5 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, unsigned long *reset_requests); int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev); int hclge_handle_mac_tnl(struct hclge_dev *hdev); +int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c42574e29747..99c0576e6383 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3424,7 +3424,7 @@ static int hclge_get_status(struct hnae3_handle *handle) return hdev->hw.mac.link; } -static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf) +struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf) { if (!pci_num_vf(hdev->pdev)) { dev_err(&hdev->pdev->dev, @@ -4468,6 +4468,7 @@ static void hclge_handle_err_recovery(struct hclge_dev *hdev) if (hclge_find_error_source(hdev)) { hclge_handle_error_info_log(ae_dev); hclge_handle_mac_tnl(hdev); + hclge_handle_vf_queue_err_ras(hdev); } hclge_handle_err_reset_request(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 7bc2049b723d..02c7aab3546e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -1146,4 +1146,6 @@ int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len); int hclge_push_vf_link_status(struct hclge_vport *vport); int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en); int hclge_mac_update_stats(struct hclge_dev *hdev); +struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf); +int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 04ff9bf12185..4b0d07ca2505 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -124,7 +124,7 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, return status; } -static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type) +int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type) { __le16 msg_data; u8 dest_vfid; diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 251dedd55cfb..1e29e5c9a2df 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -900,7 +900,7 @@ static int ehea_poll(struct napi_struct *napi, int budget) if (!cqe && !cqe_skb) return rx; - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) return rx; cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 462646d1b817..2439f7e96e05 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -442,7 +442,7 @@ static int mal_poll(struct napi_struct *napi, int budget) if (unlikely(mc->ops->peek_rx(mc->dev) || test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { MAL_DBG2(mal, "rotting packet" NL); - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) goto more_work; spin_lock_irqsave(&mal->lock, flags); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index a8d79ee350f8..b5aef0b29efe 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1432,7 +1432,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) BUG_ON(lpar_rc != H_SUCCESS); if (ibmveth_rxq_pending_buffer(adapter) && - napi_reschedule(napi)) { + napi_schedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cdf5251e5679..30c47b8470ad 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3519,7 +3519,7 @@ restart_poll: if (napi_complete_done(napi, frames_processed)) { enable_scrq_irq(adapter, rx_scrq); if (pending_scrq(adapter, rx_scrq)) { - if (napi_reschedule(napi)) { + if (napi_schedule(napi)) { disable_scrq_irq(adapter, rx_scrq); goto restart_poll; } @@ -5247,7 +5247,8 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq, /* copy firmware version string from vpd into adapter */ if ((substr + 3 + fw_level_len) < (adapter->vpd->buff + adapter->vpd->len)) { - strncpy((char *)adapter->fw_version, substr + 3, fw_level_len); + strscpy(adapter->fw_version, substr + 3, + sizeof(adapter->fw_version)); } else { dev_info(dev, "FW substr extrapolated VPD buff\n"); } diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index e6684f3cc0ce..06ddd7147c7f 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -225,6 +225,7 @@ config I40E depends on PTP_1588_CLOCK_OPTIONAL depends on PCI select AUXILIARY_BUS + select NET_DEVLINK help This driver supports Intel(R) Ethernet Controller XL710 Family of devices. For more information on how to identify your adapter, go diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile index 2f21b3e89fd0..cad93f323bd5 100644 --- a/drivers/net/ethernet/intel/i40e/Makefile +++ b/drivers/net/ethernet/intel/i40e/Makefile @@ -24,6 +24,7 @@ i40e-objs := i40e_main.o \ i40e_ddp.o \ i40e_client.o \ i40e_virtchnl_pf.o \ - i40e_xsk.o + i40e_xsk.o \ + i40e_devlink.o i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 214744de120d..c4cd54aa4dd5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -9,10 +9,12 @@ #include <linux/types.h> #include <linux/avf/virtchnl.h> #include <linux/net/intel/i40e_client.h> +#include <net/devlink.h> #include <net/pkt_cls.h> #include <net/udp_tunnel.h> #include "i40e_dcb.h" #include "i40e_debug.h" +#include "i40e_devlink.h" #include "i40e_io.h" #include "i40e_prototype.h" #include "i40e_register.h" @@ -47,23 +49,19 @@ #define I40E_QUEUE_WAIT_RETRY_LIMIT 10 #define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) -#define I40E_NVM_VERSION_LO_SHIFT 0 -#define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) -#define I40E_NVM_VERSION_HI_SHIFT 12 -#define I40E_NVM_VERSION_HI_MASK (0xf << I40E_NVM_VERSION_HI_SHIFT) -#define I40E_OEM_VER_BUILD_MASK 0xffff -#define I40E_OEM_VER_PATCH_MASK 0xff -#define I40E_OEM_VER_BUILD_SHIFT 8 -#define I40E_OEM_VER_SHIFT 24 #define I40E_PHY_DEBUG_ALL \ (I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \ I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW) #define I40E_OEM_EETRACK_ID 0xffffffff -#define I40E_OEM_GEN_SHIFT 24 -#define I40E_OEM_SNAP_MASK 0x00ff0000 -#define I40E_OEM_SNAP_SHIFT 16 -#define I40E_OEM_RELEASE_MASK 0x0000ffff +#define I40E_NVM_VERSION_LO_MASK GENMASK(7, 0) +#define I40E_NVM_VERSION_HI_MASK GENMASK(15, 12) +#define I40E_OEM_VER_BUILD_MASK GENMASK(23, 8) +#define I40E_OEM_VER_PATCH_MASK GENMASK(7, 0) +#define I40E_OEM_VER_MASK GENMASK(31, 24) +#define I40E_OEM_GEN_MASK GENMASK(31, 24) +#define I40E_OEM_SNAP_MASK GENMASK(23, 16) +#define I40E_OEM_RELEASE_MASK GENMASK(15, 0) #define I40E_RX_DESC(R, i) \ (&(((union i40e_rx_desc *)((R)->desc))[i])) @@ -411,6 +409,7 @@ static inline const u8 *i40e_channel_mac(struct i40e_channel *ch) /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; + struct devlink_port devlink_port; struct i40e_hw hw; DECLARE_BITMAP(state, __I40E_STATE_SIZE__); struct msix_entry *msix_entries; @@ -951,43 +950,104 @@ struct i40e_device { }; /** - * i40e_nvm_version_str - format the NVM version strings + * i40e_info_nvm_ver - format the NVM version string * @hw: ptr to the hardware info + * @buf: string buffer to store + * @len: buffer size + * + * Formats NVM version string as: + * <gen>.<snap>.<release> when eetrackid == I40E_OEM_EETRACK_ID + * <nvm_major>.<nvm_minor> otherwise **/ -static inline char *i40e_nvm_version_str(struct i40e_hw *hw) +static inline void i40e_info_nvm_ver(struct i40e_hw *hw, char *buf, size_t len) { - static char buf[32]; - u32 full_ver; - - full_ver = hw->nvm.oem_ver; + struct i40e_nvm_info *nvm = &hw->nvm; - if (hw->nvm.eetrack == I40E_OEM_EETRACK_ID) { + if (nvm->eetrack == I40E_OEM_EETRACK_ID) { + u32 full_ver = nvm->oem_ver; u8 gen, snap; u16 release; - gen = (u8)(full_ver >> I40E_OEM_GEN_SHIFT); - snap = (u8)((full_ver & I40E_OEM_SNAP_MASK) >> - I40E_OEM_SNAP_SHIFT); - release = (u16)(full_ver & I40E_OEM_RELEASE_MASK); - - snprintf(buf, sizeof(buf), "%x.%x.%x", gen, snap, release); + gen = FIELD_GET(I40E_OEM_GEN_MASK, full_ver); + snap = FIELD_GET(I40E_OEM_SNAP_MASK, full_ver); + release = FIELD_GET(I40E_OEM_RELEASE_MASK, full_ver); + snprintf(buf, len, "%x.%x.%x", gen, snap, release); } else { - u8 ver, patch; - u16 build; + u8 major, minor; - ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT); - build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) & - I40E_OEM_VER_BUILD_MASK); - patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK); + major = FIELD_GET(I40E_NVM_VERSION_HI_MASK, nvm->version); + minor = FIELD_GET(I40E_NVM_VERSION_LO_MASK, nvm->version); + snprintf(buf, len, "%x.%02x", major, minor); + } +} - snprintf(buf, sizeof(buf), - "%x.%02x 0x%x %d.%d.%d", - (hw->nvm.version & I40E_NVM_VERSION_HI_MASK) >> - I40E_NVM_VERSION_HI_SHIFT, - (hw->nvm.version & I40E_NVM_VERSION_LO_MASK) >> - I40E_NVM_VERSION_LO_SHIFT, - hw->nvm.eetrack, ver, build, patch); +/** + * i40e_info_eetrack - format the EETrackID string + * @hw: ptr to the hardware info + * @buf: string buffer to store + * @len: buffer size + * + * Returns hexadecimally formated EETrackID if it is + * different from I40E_OEM_EETRACK_ID or empty string. + **/ +static inline void i40e_info_eetrack(struct i40e_hw *hw, char *buf, size_t len) +{ + struct i40e_nvm_info *nvm = &hw->nvm; + + buf[0] = '\0'; + if (nvm->eetrack != I40E_OEM_EETRACK_ID) + snprintf(buf, len, "0x%08x", nvm->eetrack); +} + +/** + * i40e_info_civd_ver - format the NVM version strings + * @hw: ptr to the hardware info + * @buf: string buffer to store + * @len: buffer size + * + * Returns formated combo image version if adapter's EETrackID is + * different from I40E_OEM_EETRACK_ID or empty string. + **/ +static inline void i40e_info_civd_ver(struct i40e_hw *hw, char *buf, size_t len) +{ + struct i40e_nvm_info *nvm = &hw->nvm; + + buf[0] = '\0'; + if (nvm->eetrack != I40E_OEM_EETRACK_ID) { + u32 full_ver = nvm->oem_ver; + u8 major, minor; + u16 build; + + major = FIELD_GET(I40E_OEM_VER_MASK, full_ver); + build = FIELD_GET(I40E_OEM_VER_BUILD_MASK, full_ver); + minor = FIELD_GET(I40E_OEM_VER_PATCH_MASK, full_ver); + snprintf(buf, len, "%d.%d.%d", major, build, minor); } +} + +/** + * i40e_nvm_version_str - format the NVM version strings + * @hw: ptr to the hardware info + * @buf: string buffer to store + * @len: buffer size + **/ +static inline char *i40e_nvm_version_str(struct i40e_hw *hw, char *buf, + size_t len) +{ + char ver[16] = " "; + + /* Get NVM version */ + i40e_info_nvm_ver(hw, buf, len); + + /* Append EETrackID if provided */ + i40e_info_eetrack(hw, &ver[1], sizeof(ver) - 1); + if (strlen(ver) > 1) + strlcat(buf, ver, len); + + /* Append combo image version if provided */ + i40e_info_civd_ver(hw, &ver[1], sizeof(ver) - 1); + if (strlen(ver) > 1) + strlcat(buf, ver, len); return buf; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 6d1042ca0317..04db9cdc7d94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -821,62 +821,72 @@ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable) } /** - * i40e_read_pba_string - Reads part number string from EEPROM + * i40e_get_pba_string - Reads part number string from EEPROM * @hw: pointer to hardware structure - * @pba_num: stores the part number string from the EEPROM - * @pba_num_size: part number string buffer length * - * Reads the part number string from the EEPROM. + * Reads the part number string from the EEPROM and stores it + * into newly allocated buffer and saves resulting pointer + * to i40e_hw->pba_id field. **/ -int i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num, - u32 pba_num_size) +void i40e_get_pba_string(struct i40e_hw *hw) { +#define I40E_NVM_PBA_FLAGS_BLK_PRESENT 0xFAFA u16 pba_word = 0; u16 pba_size = 0; u16 pba_ptr = 0; - int status = 0; - u16 i = 0; + int status; + char *ptr; + u16 i; status = i40e_read_nvm_word(hw, I40E_SR_PBA_FLAGS, &pba_word); - if (status || (pba_word != 0xFAFA)) { - hw_dbg(hw, "Failed to read PBA flags or flag is invalid.\n"); - return status; + if (status) { + hw_dbg(hw, "Failed to read PBA flags.\n"); + return; + } + if (pba_word != I40E_NVM_PBA_FLAGS_BLK_PRESENT) { + hw_dbg(hw, "PBA block is not present.\n"); + return; } status = i40e_read_nvm_word(hw, I40E_SR_PBA_BLOCK_PTR, &pba_ptr); if (status) { hw_dbg(hw, "Failed to read PBA Block pointer.\n"); - return status; + return; } status = i40e_read_nvm_word(hw, pba_ptr, &pba_size); if (status) { hw_dbg(hw, "Failed to read PBA Block size.\n"); - return status; + return; } /* Subtract one to get PBA word count (PBA Size word is included in - * total size) + * total size) and advance pointer to first PBA word. */ pba_size--; - if (pba_num_size < (((u32)pba_size * 2) + 1)) { - hw_dbg(hw, "Buffer too small for PBA data.\n"); - return -EINVAL; + pba_ptr++; + if (!pba_size) { + hw_dbg(hw, "PBA ID is empty.\n"); + return; } + ptr = devm_kzalloc(i40e_hw_to_dev(hw), pba_size * 2 + 1, GFP_KERNEL); + if (!ptr) + return; + hw->pba_id = ptr; + for (i = 0; i < pba_size; i++) { - status = i40e_read_nvm_word(hw, (pba_ptr + 1) + i, &pba_word); + status = i40e_read_nvm_word(hw, pba_ptr + i, &pba_word); if (status) { hw_dbg(hw, "Failed to read PBA Block word %d.\n", i); - return status; + devm_kfree(i40e_hw_to_dev(hw), hw->pba_id); + hw->pba_id = NULL; + return; } - pba_num[(i * 2)] = (pba_word >> 8) & 0xFF; - pba_num[(i * 2) + 1] = pba_word & 0xFF; + *ptr++ = (pba_word >> 8) & 0xFF; + *ptr++ = pba_word & 0xFF; } - pba_num[(pba_size * 2)] = '\0'; - - return status; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c new file mode 100644 index 000000000000..9168ade8da47 --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2023 Intel Corporation. */ + +#include <net/devlink.h> +#include "i40e.h" +#include "i40e_devlink.h" + +static void i40e_info_get_dsn(struct i40e_pf *pf, char *buf, size_t len) +{ + u8 dsn[8]; + + put_unaligned_be64(pci_get_dsn(pf->pdev), dsn); + + snprintf(buf, len, "%8phD", dsn); +} + +static void i40e_info_fw_mgmt(struct i40e_hw *hw, char *buf, size_t len) +{ + struct i40e_adminq_info *aq = &hw->aq; + + snprintf(buf, len, "%u.%u.%05d", + aq->fw_maj_ver, aq->fw_min_ver, aq->fw_build); +} + +static void i40e_info_fw_api(struct i40e_hw *hw, char *buf, size_t len) +{ + struct i40e_adminq_info *aq = &hw->aq; + + snprintf(buf, len, "%u.%u", aq->api_maj_ver, aq->api_min_ver); +} + +static void i40e_info_pba(struct i40e_hw *hw, char *buf, size_t len) +{ + buf[0] = '\0'; + if (hw->pba_id) + strscpy(buf, hw->pba_id, len); +} + +enum i40e_devlink_version_type { + I40E_DL_VERSION_FIXED, + I40E_DL_VERSION_RUNNING, +}; + +static int i40e_devlink_info_put(struct devlink_info_req *req, + enum i40e_devlink_version_type type, + const char *key, const char *value) +{ + if (!strlen(value)) + return 0; + + switch (type) { + case I40E_DL_VERSION_FIXED: + return devlink_info_version_fixed_put(req, key, value); + case I40E_DL_VERSION_RUNNING: + return devlink_info_version_running_put(req, key, value); + } + return 0; +} + +static int i40e_devlink_info_get(struct devlink *dl, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct i40e_pf *pf = devlink_priv(dl); + struct i40e_hw *hw = &pf->hw; + char buf[32]; + int err; + + i40e_info_get_dsn(pf, buf, sizeof(buf)); + err = devlink_info_serial_number_put(req, buf); + if (err) + return err; + + i40e_info_fw_mgmt(hw, buf, sizeof(buf)); + err = i40e_devlink_info_put(req, I40E_DL_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, buf); + if (err) + return err; + + i40e_info_fw_api(hw, buf, sizeof(buf)); + err = i40e_devlink_info_put(req, I40E_DL_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API, + buf); + if (err) + return err; + + i40e_info_nvm_ver(hw, buf, sizeof(buf)); + err = i40e_devlink_info_put(req, I40E_DL_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_PSID, buf); + if (err) + return err; + + i40e_info_eetrack(hw, buf, sizeof(buf)); + err = i40e_devlink_info_put(req, I40E_DL_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, + buf); + if (err) + return err; + + i40e_info_civd_ver(hw, buf, sizeof(buf)); + err = i40e_devlink_info_put(req, I40E_DL_VERSION_RUNNING, + DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, buf); + if (err) + return err; + + i40e_info_pba(hw, buf, sizeof(buf)); + err = i40e_devlink_info_put(req, I40E_DL_VERSION_FIXED, + DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, buf); + + return err; +} + +static const struct devlink_ops i40e_devlink_ops = { + .info_get = i40e_devlink_info_get, +}; + +/** + * i40e_alloc_pf - Allocate devlink and return i40e_pf structure pointer + * @dev: the device to allocate for + * + * Allocate a devlink instance for this device and return the private + * area as the i40e_pf structure. + **/ +struct i40e_pf *i40e_alloc_pf(struct device *dev) +{ + struct devlink *devlink; + + devlink = devlink_alloc(&i40e_devlink_ops, sizeof(struct i40e_pf), dev); + if (!devlink) + return NULL; + + return devlink_priv(devlink); +} + +/** + * i40e_free_pf - Free i40e_pf structure and associated devlink + * @pf: the PF structure + * + * Free i40e_pf structure and devlink allocated by devlink_alloc. + **/ +void i40e_free_pf(struct i40e_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + + devlink_free(devlink); +} + +/** + * i40e_devlink_register - Register devlink interface for this PF + * @pf: the PF to register the devlink for. + * + * Register the devlink instance associated with this physical function. + **/ +void i40e_devlink_register(struct i40e_pf *pf) +{ + devlink_register(priv_to_devlink(pf)); +} + +/** + * i40e_devlink_unregister - Unregister devlink resources for this PF. + * @pf: the PF structure to cleanup + * + * Releases resources used by devlink and cleans up associated memory. + **/ +void i40e_devlink_unregister(struct i40e_pf *pf) +{ + devlink_unregister(priv_to_devlink(pf)); +} + +/** + * i40e_devlink_set_switch_id - Set unique switch id based on pci dsn + * @pf: the PF to create a devlink port for + * @ppid: struct with switch id information + */ +static void i40e_devlink_set_switch_id(struct i40e_pf *pf, + struct netdev_phys_item_id *ppid) +{ + u64 id = pci_get_dsn(pf->pdev); + + ppid->id_len = sizeof(id); + put_unaligned_be64(id, &ppid->id); +} + +/** + * i40e_devlink_create_port - Create a devlink port for this PF + * @pf: the PF to create a port for + * + * Create and register a devlink_port for this PF. Note that although each + * physical function is connected to a separate devlink instance, the port + * will still be numbered according to the physical function id. + * + * Return: zero on success or an error code on failure. + **/ +int i40e_devlink_create_port(struct i40e_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct devlink_port_attrs attrs = {}; + struct device *dev = &pf->pdev->dev; + int err; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pf->hw.pf_id; + i40e_devlink_set_switch_id(pf, &attrs.switch_id); + devlink_port_attrs_set(&pf->devlink_port, &attrs); + err = devlink_port_register(devlink, &pf->devlink_port, pf->hw.pf_id); + if (err) { + dev_err(dev, "devlink_port_register failed: %d\n", err); + return err; + } + + return 0; +} + +/** + * i40e_devlink_destroy_port - Destroy the devlink_port for this PF + * @pf: the PF to cleanup + * + * Unregisters the devlink_port structure associated with this PF. + **/ +void i40e_devlink_destroy_port(struct i40e_pf *pf) +{ + devlink_port_type_clear(&pf->devlink_port); + devlink_port_unregister(&pf->devlink_port); +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h b/drivers/net/ethernet/intel/i40e/i40e_devlink.h new file mode 100644 index 000000000000..469fb3d2ee25 --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023, Intel Corporation. */ + +#ifndef _I40E_DEVLINK_H_ +#define _I40E_DEVLINK_H_ + +#include <linux/device.h> + +struct i40e_pf; + +struct i40e_pf *i40e_alloc_pf(struct device *dev); +void i40e_free_pf(struct i40e_pf *pf); +void i40e_devlink_register(struct i40e_pf *pf); +void i40e_devlink_unregister(struct i40e_pf *pf); +int i40e_devlink_create_port(struct i40e_pf *pf); +void i40e_devlink_destroy_port(struct i40e_pf *pf); + +#endif /* _I40E_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a89f7ca510fd..ebf36f76c0d7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2006,8 +2006,8 @@ static void i40e_get_drvinfo(struct net_device *netdev, struct i40e_pf *pf = vsi->back; strscpy(drvinfo->driver, i40e_driver_name, sizeof(drvinfo->driver)); - strscpy(drvinfo->fw_version, i40e_nvm_version_str(&pf->hw), - sizeof(drvinfo->fw_version)); + i40e_nvm_version_str(&pf->hw, drvinfo->fw_version, + sizeof(drvinfo->fw_version)); strscpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_priv_flags = I40E_PRIV_FLAGS_STR_LEN; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1e52e1debf7c..3157d14d9b12 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10798,7 +10798,9 @@ static void i40e_get_oem_version(struct i40e_hw *hw) &gen_snap); i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET, &release); - hw->nvm.oem_ver = (gen_snap << I40E_OEM_SNAP_SHIFT) | release; + hw->nvm.oem_ver = + FIELD_PREP(I40E_OEM_GEN_MASK | I40E_OEM_SNAP_MASK, gen_snap) | + FIELD_PREP(I40E_OEM_RELEASE_MASK, release); hw->nvm.eetrack = I40E_OEM_EETRACK_ID; } @@ -14211,6 +14213,8 @@ int i40e_vsi_release(struct i40e_vsi *vsi) } set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { vsi->netdev_registered = false; @@ -14398,6 +14402,8 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) err_rings: i40e_vsi_free_q_vectors(vsi); + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); if (vsi->netdev_registered) { vsi->netdev_registered = false; unregister_netdev(vsi->netdev); @@ -14544,9 +14550,15 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; + if (vsi->type == I40E_VSI_MAIN) { + ret = i40e_devlink_create_port(pf); + if (ret) + goto err_netdev; + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); + } ret = register_netdev(vsi->netdev); if (ret) - goto err_netdev; + goto err_dl_port; vsi->netdev_registered = true; netif_carrier_off(vsi->netdev); #ifdef CONFIG_I40E_DCB @@ -14589,6 +14601,9 @@ err_msix: free_netdev(vsi->netdev); vsi->netdev = NULL; } +err_dl_port: + if (vsi->type == I40E_VSI_MAIN) + i40e_devlink_destroy_port(pf); err_netdev: i40e_aq_delete_element(&pf->hw, vsi->seid, NULL); err_vsi: @@ -15619,7 +15634,7 @@ err_switch_setup: iounmap(hw->hw_addr); pci_release_mem_regions(pf->pdev); pci_disable_device(pf->pdev); - kfree(pf); + i40e_free_pf(pf); return err; } @@ -15661,6 +15676,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct i40e_hw *hw; static u16 pfs_found; u16 wol_nvm_bits; + char nvm_ver[32]; u16 link_status; #ifdef CONFIG_I40E_DCB int status; @@ -15696,7 +15712,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * the Admin Queue structures and then querying for the * device's current profile information. */ - pf = kzalloc(sizeof(*pf), GFP_KERNEL); + pf = i40e_alloc_pf(&pdev->dev); if (!pf) { err = -ENOMEM; goto err_pf_alloc; @@ -15830,13 +15846,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_pf_reset; } i40e_get_oem_version(hw); + i40e_get_pba_string(hw); /* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */ + i40e_nvm_version_str(hw, nvm_ver, sizeof(nvm_ver)); dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, - hw->aq.api_maj_ver, hw->aq.api_min_ver, - i40e_nvm_version_str(hw), hw->vendor_id, hw->device_id, - hw->subsystem_vendor_id, hw->subsystem_device_id); + hw->aq.api_maj_ver, hw->aq.api_min_ver, nvm_ver, + hw->vendor_id, hw->device_id, hw->subsystem_vendor_id, + hw->subsystem_device_id); if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) @@ -16223,6 +16241,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* print a string summarizing features */ i40e_print_features(pf); + i40e_devlink_register(pf); + return 0; /* Unwind what we've done if something failed in the setup */ @@ -16243,7 +16263,7 @@ err_adminq_setup: err_pf_reset: iounmap(hw->hw_addr); err_ioremap: - kfree(pf); + i40e_free_pf(pf); err_pf_alloc: pci_release_mem_regions(pdev); err_pci_reg: @@ -16268,6 +16288,8 @@ static void i40e_remove(struct pci_dev *pdev) int ret_code; int i; + i40e_devlink_unregister(pf); + i40e_dbg_pf_exit(pf); i40e_ptp_stop(pf); @@ -16393,7 +16415,7 @@ unmap: kfree(pf->vsi); iounmap(hw->hw_addr); - kfree(pf); + i40e_free_pf(pf); pci_release_mem_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 46b9a05ceb91..001162042050 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -341,8 +341,7 @@ i40e_aq_configure_partition_bw(struct i40e_hw *hw, struct i40e_aqc_configure_partition_bw_data *bw_data, struct i40e_asq_cmd_details *cmd_details); int i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr); -int i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num, - u32 pba_num_size); +void i40e_get_pba_string(struct i40e_hw *hw); void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable); /* prototype for functions used for NVM access */ int i40e_init_nvm(struct i40e_hw *hw); diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index dc7cd16ad8fb..aff6dc6afbe2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -493,6 +493,9 @@ struct i40e_hw { struct i40e_nvm_info nvm; struct i40e_fc_info fc; + /* PBA ID */ + const char *pba_id; + /* pci info */ u16 device_id; u16 vendor_id; diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 1faee9cb944d..835c419ccc74 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -879,6 +879,199 @@ ice_dpll_output_direction(const struct dpll_pin *pin, void *pin_priv, } /** + * ice_dpll_pin_phase_adjust_get - callback for get pin phase adjust value + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @phase_adjust: on success holds pin phase_adjust value + * @extack: error reporting + * + * Dpll subsystem callback. Handler for getting phase adjust value of a pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error + */ +static int +ice_dpll_pin_phase_adjust_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_pf *pf = p->pf; + + mutex_lock(&pf->dplls.lock); + *phase_adjust = p->phase_adjust; + mutex_unlock(&pf->dplls.lock); + + return 0; +} + +/** + * ice_dpll_pin_phase_adjust_set - helper for setting a pin phase adjust value + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @phase_adjust: phase_adjust to be set + * @extack: error reporting + * @type: type of a pin + * + * Helper for dpll subsystem callback. Handler for setting phase adjust value + * of a pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error + */ +static int +ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 phase_adjust, + struct netlink_ext_ack *extack, + enum ice_dpll_pin_type type) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + u8 flag, flags_en = 0; + int ret; + + mutex_lock(&pf->dplls.lock); + switch (type) { + case ICE_DPLL_PIN_TYPE_INPUT: + flag = ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_DELAY; + if (p->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN) + flags_en |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN; + if (p->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN) + flags_en |= ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN; + ret = ice_aq_set_input_pin_cfg(&pf->hw, p->idx, flag, flags_en, + 0, phase_adjust); + break; + case ICE_DPLL_PIN_TYPE_OUTPUT: + flag = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_PHASE; + if (p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_OUT_EN) + flag |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN; + if (p->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) + flag |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN; + ret = ice_aq_set_output_pin_cfg(&pf->hw, p->idx, flag, 0, 0, + phase_adjust); + break; + default: + ret = -EINVAL; + } + if (!ret) + p->phase_adjust = phase_adjust; + mutex_unlock(&pf->dplls.lock); + if (ret) + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to set pin phase_adjust:%d for pin:%u on dpll:%u\n", + ret, + ice_aq_str(pf->hw.adminq.sq_last_status), + phase_adjust, p->idx, d->dpll_idx); + + return ret; +} + +/** + * ice_dpll_input_phase_adjust_set - callback for set input pin phase adjust + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @phase_adjust: phase_adjust to be set + * @extack: error reporting + * + * Dpll subsystem callback. Wraps a handler for setting phase adjust on input + * pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error + */ +static int +ice_dpll_input_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 phase_adjust, + struct netlink_ext_ack *extack) +{ + return ice_dpll_pin_phase_adjust_set(pin, pin_priv, dpll, dpll_priv, + phase_adjust, extack, + ICE_DPLL_PIN_TYPE_INPUT); +} + +/** + * ice_dpll_output_phase_adjust_set - callback for set output pin phase adjust + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @phase_adjust: phase_adjust to be set + * @extack: error reporting + * + * Dpll subsystem callback. Wraps a handler for setting phase adjust on output + * pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error + */ +static int +ice_dpll_output_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 phase_adjust, + struct netlink_ext_ack *extack) +{ + return ice_dpll_pin_phase_adjust_set(pin, pin_priv, dpll, dpll_priv, + phase_adjust, extack, + ICE_DPLL_PIN_TYPE_OUTPUT); +} + +#define ICE_DPLL_PHASE_OFFSET_DIVIDER 100 +#define ICE_DPLL_PHASE_OFFSET_FACTOR \ + (DPLL_PHASE_OFFSET_DIVIDER / ICE_DPLL_PHASE_OFFSET_DIVIDER) +/** + * ice_dpll_phase_offset_get - callback for get dpll phase shift value + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @phase_offset: on success holds pin phase_offset value + * @extack: error reporting + * + * Dpll subsystem callback. Handler for getting phase shift value between + * dpll's input and output. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error + */ +static int +ice_dpll_phase_offset_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, struct netlink_ext_ack *extack) +{ + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + + mutex_lock(&pf->dplls.lock); + if (d->active_input == pin) + *phase_offset = d->phase_offset * ICE_DPLL_PHASE_OFFSET_FACTOR; + else + *phase_offset = 0; + mutex_unlock(&pf->dplls.lock); + + return 0; +} + +/** * ice_dpll_rclk_state_on_pin_set - set a state on rclk pin * @pin: pointer to a pin * @pin_priv: private data pointer passed on pin registration @@ -993,6 +1186,9 @@ static const struct dpll_pin_ops ice_dpll_input_ops = { .prio_get = ice_dpll_input_prio_get, .prio_set = ice_dpll_input_prio_set, .direction_get = ice_dpll_input_direction, + .phase_adjust_get = ice_dpll_pin_phase_adjust_get, + .phase_adjust_set = ice_dpll_input_phase_adjust_set, + .phase_offset_get = ice_dpll_phase_offset_get, }; static const struct dpll_pin_ops ice_dpll_output_ops = { @@ -1001,6 +1197,8 @@ static const struct dpll_pin_ops ice_dpll_output_ops = { .state_on_dpll_get = ice_dpll_output_state_get, .state_on_dpll_set = ice_dpll_output_state_set, .direction_get = ice_dpll_output_direction, + .phase_adjust_get = ice_dpll_pin_phase_adjust_get, + .phase_adjust_set = ice_dpll_output_phase_adjust_set, }; static const struct dpll_device_ops ice_dpll_ops = { @@ -1031,6 +1229,8 @@ static u64 ice_generate_clock_id(struct ice_pf *pf) */ static void ice_dpll_notify_changes(struct ice_dpll *d) { + bool pin_notified = false; + if (d->prev_dpll_state != d->dpll_state) { d->prev_dpll_state = d->dpll_state; dpll_device_change_ntf(d->dpll); @@ -1039,7 +1239,14 @@ static void ice_dpll_notify_changes(struct ice_dpll *d) if (d->prev_input) dpll_pin_change_ntf(d->prev_input); d->prev_input = d->active_input; - if (d->active_input) + if (d->active_input) { + dpll_pin_change_ntf(d->active_input); + pin_notified = true; + } + } + if (d->prev_phase_offset != d->phase_offset) { + d->prev_phase_offset = d->phase_offset; + if (!pin_notified && d->active_input) dpll_pin_change_ntf(d->active_input); } } @@ -1065,7 +1272,7 @@ ice_dpll_update_state(struct ice_pf *pf, struct ice_dpll *d, bool init) ret = ice_get_cgu_state(&pf->hw, d->dpll_idx, d->prev_dpll_state, &d->input_idx, &d->ref_state, &d->eec_mode, - &d->phase_shift, &d->dpll_state); + &d->phase_offset, &d->dpll_state); dev_dbg(ice_pf_to_dev(pf), "update dpll=%d, prev_src_idx:%u, src_idx:%u, state:%d, prev:%d mode:%d\n", @@ -1656,6 +1863,15 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, return ret; pins[i].prop.capabilities |= DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE; + pins[i].prop.phase_range.min = + pf->dplls.input_phase_adj_max; + pins[i].prop.phase_range.max = + -pf->dplls.input_phase_adj_max; + } else { + pins[i].prop.phase_range.min = + pf->dplls.output_phase_adj_max; + pins[i].prop.phase_range.max = + -pf->dplls.output_phase_adj_max; } pins[i].prop.capabilities |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h index 2dfe764b81e1..bb32b6d88373 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.h +++ b/drivers/net/ethernet/intel/ice/ice_dpll.h @@ -19,6 +19,7 @@ * @state: state of a pin * @prop: pin properties * @freq: current frequency of a pin + * @phase_adjust: current phase adjust value */ struct ice_dpll_pin { struct dpll_pin *pin; @@ -30,6 +31,7 @@ struct ice_dpll_pin { u8 state[ICE_DPLL_RCLK_NUM_MAX]; struct dpll_pin_properties prop; u32 freq; + s32 phase_adjust; }; /** ice_dpll - store info required for DPLL control @@ -40,7 +42,8 @@ struct ice_dpll_pin { * @prev_input_idx: previously selected input index * @ref_state: state of dpll reference signals * @eec_mode: eec_mode dpll is configured for - * @phase_shift: phase shift delay of a dpll + * @phase_offset: phase offset of active pin vs dpll signal + * @prev_phase_offset: previous phase offset of active pin vs dpll signal * @input_prio: priorities of each input * @dpll_state: current dpll sync state * @prev_dpll_state: last dpll sync state @@ -55,7 +58,8 @@ struct ice_dpll { u8 prev_input_idx; u8 ref_state; u8 eec_mode; - s64 phase_shift; + s64 phase_offset; + s64 prev_phase_offset; u8 *input_prio; enum dpll_lock_status dpll_state; enum dpll_lock_status prev_dpll_state; @@ -78,6 +82,8 @@ struct ice_dpll { * @cgu_state_acq_err_num: number of errors returned during periodic work * @base_rclk_idx: idx of first pin used for clock revocery pins * @clock_id: clock_id of dplls + * @input_phase_adj_max: max phase adjust value for an input pins + * @output_phase_adj_max: max phase adjust value for an output pins */ struct ice_dplls { struct kthread_worker *kworker; diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 2c96d1883e19..165a9d512ce2 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -2069,3 +2069,35 @@ lag_rebuild_out: } mutex_unlock(&pf->lag_mutex); } + +/** + * ice_lag_is_switchdev_running + * @pf: pointer to PF structure + * + * Check if switchdev is running on any of the interfaces connected to lag. + */ +bool ice_lag_is_switchdev_running(struct ice_pf *pf) +{ + struct ice_lag *lag = pf->lag; + struct net_device *tmp_nd; + + if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) || !lag) + return false; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { + struct ice_netdev_priv *priv = netdev_priv(tmp_nd); + + if (!netif_is_ice(tmp_nd) || !priv || !priv->vsi || + !priv->vsi->back) + continue; + + if (ice_is_switchdev_running(priv->vsi->back)) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 18075b82485a..facb6c894b6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -62,4 +62,5 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf); int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); +bool ice_lag_is_switchdev_running(struct ice_pf *pf); #endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index acc3ffc940e7..1f45f0c3963d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3562,6 +3562,12 @@ int ice_set_dflt_vsi(struct ice_vsi *vsi) dev = ice_pf_to_dev(vsi->back); + if (ice_lag_is_switchdev_running(vsi->back)) { + dev_dbg(dev, "VSI %d passed is a part of LAG containing interfaces in switchdev mode, nothing to do\n", + vsi->vsi_num); + return 0; + } + /* the VSI passed in is already the default VSI */ if (ice_is_vsi_dflt_vsi(vsi)) { dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n", diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 4c6e2a485d8e..cd593f5719e1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -28,6 +28,9 @@ static inline void ixgbe_alloc_vf_macvlans(struct ixgbe_adapter *adapter, struct vf_macvlans *mv_list; int num_vf_macvlans, i; + /* Initialize list of VF macvlans */ + INIT_LIST_HEAD(&adapter->vf_mvs.l); + num_vf_macvlans = hw->mac.num_rar_entries - (IXGBE_MAX_PF_MACVLANS + 1 + num_vfs); if (!num_vf_macvlans) @@ -36,8 +39,6 @@ static inline void ixgbe_alloc_vf_macvlans(struct ixgbe_adapter *adapter, mv_list = kcalloc(num_vf_macvlans, sizeof(struct vf_macvlans), GFP_KERNEL); if (mv_list) { - /* Initialize list of VF macvlans */ - INIT_LIST_HEAD(&adapter->vf_mvs.l); for (i = 0; i < num_vf_macvlans; i++) { mv_list[i].vf = -1; mv_list[i].free = true; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f11a3f6540b0..93137606869e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5831,7 +5831,7 @@ static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port, v->type = MVPP2_QUEUE_VECTOR_SHARED; if (port->flags & MVPP2_F_DT_COMPAT) - strncpy(irqname, "rx-shared", sizeof(irqname)); + strscpy(irqname, "rx-shared", sizeof(irqname)); } if (port_node) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index e06f77ad6106..6c70c8498690 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1218,8 +1218,6 @@ static inline void link_status_user_format(u64 lstat, struct cgx_link_user_info *linfo, struct cgx *cgx, u8 lmac_id) { - const char *lmac_string; - linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; @@ -1230,12 +1228,12 @@ static inline void link_status_user_format(u64 lstat, if (linfo->lmac_type_id >= LMAC_MODE_MAX) { dev_err(&cgx->pdev->dev, "Unknown lmac_type_id %d reported by firmware on cgx port%d:%d", linfo->lmac_type_id, cgx->cgx_id, lmac_id); - strncpy(linfo->lmac_type, "Unknown", LMACTYPE_STR_LEN - 1); + strscpy(linfo->lmac_type, "Unknown", sizeof(linfo->lmac_type)); return; } - lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; - strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); + strscpy(linfo->lmac_type, cgx_lmactype_string[linfo->lmac_type_id], + sizeof(linfo->lmac_type)); } /* Hardware event handlers */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index f2b1edf1bb43..15a319684ed3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -756,12 +756,11 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_PTP)) return 0; - /* This msg is expected only from PFs that are mapped to CGX LMACs, + /* This msg is expected only from PF/VFs that are mapped to CGX/RPM LMACs, * if received from other PF/VF simply ACK, nothing to do. */ - if ((pcifunc & RVU_PFVF_FUNC_MASK) || - !is_pf_cgxmapped(rvu, pf)) - return -ENODEV; + if (!is_pf_cgxmapped(rvu, pf)) + return -EPERM; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); cgxd = rvu_cgx_pdata(cgx_id, rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 59b138214af2..6cc7a78968fc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -1357,10 +1357,12 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) if (netif_running(secy->netdev)) { /* Keys cannot be changed after creation */ - err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn); - if (err) - return err; + if (ctx->sa.update_pn) { + err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, + sw_tx_sa->next_pn); + if (err) + return err; + } err = cn10k_mcs_link_tx_sa2sc(pfvf, secy, txsc, sa_num, sw_tx_sa->active); @@ -1529,6 +1531,9 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) if (err) return err; + if (!ctx->sa.update_pn) + return 0; + err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, rx_sa->next_pn); if (err) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 997fedac3a98..818ce76185b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1403,6 +1403,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, return 0; } + pp_params.order = get_order(buf_size); pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs); pp_params.nid = NUMA_NO_NODE; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 332472fe4990..a09b6e05337d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -400,7 +400,7 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) for (ring = 0; ring < priv->rx_ring_num; ring++) { if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { local_bh_disable(); - napi_reschedule(&priv->rx_cq[ring]->napi); + napi_schedule(&priv->rx_cq[ring]->napi); local_bh_enable(); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index fe48d20d6118..0005d9e2c2d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1967,7 +1967,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) { u8 *dst = (u8 *)(inbox + INIT_HCA_DRIVER_VERSION_OFFSET / 4); - strncpy(dst, DRV_NAME_FOR_FW, INIT_HCA_DRIVER_VERSION_SZ - 1); + strscpy(dst, DRV_NAME_FOR_FW, INIT_HCA_DRIVER_VERSION_SZ); mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n", dst); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index afb348579577..7671c5727ed1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -525,6 +525,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SAVE_VHCA_STATE: case MLX5_CMD_OP_LOAD_VHCA_STATE: case MLX5_CMD_OP_SYNC_CRYPTO: + case MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -ENOLINK; @@ -728,6 +729,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE); MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE); MLX5_COMMAND_STR_CASE(SYNC_CRYPTO); + MLX5_COMMAND_STR_CASE(ALLOW_OTHER_VHCA_ACCESS); default: return "unknown command opcode"; } } @@ -2090,6 +2092,74 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, } EXPORT_SYMBOL(mlx5_cmd_exec_cb); +int mlx5_cmd_allow_other_vhca_access(struct mlx5_core_dev *dev, + struct mlx5_cmd_allow_other_vhca_access_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {}; + u32 in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {}; + void *key; + + MLX5_SET(allow_other_vhca_access_in, + in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS); + MLX5_SET(allow_other_vhca_access_in, + in, object_type_to_be_accessed, attr->obj_type); + MLX5_SET(allow_other_vhca_access_in, + in, object_id_to_be_accessed, attr->obj_id); + + key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key); + memcpy(key, attr->access_key, sizeof(attr->access_key)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_alias_obj_create(struct mlx5_core_dev *dev, + struct mlx5_cmd_alias_obj_create_attr *alias_attr, + u32 *obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {}; + void *param; + void *attr; + void *key; + int ret; + + attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr); + MLX5_SET(general_obj_in_cmd_hdr, + attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, + attr, obj_type, alias_attr->obj_type); + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_create_param, param, alias_object, 1); + + attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx); + MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, alias_attr->vhca_id); + MLX5_SET(alias_context, attr, object_id_to_be_accessed, alias_attr->obj_id); + + key = MLX5_ADDR_OF(alias_context, attr, access_key); + memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +int mlx5_cmd_alias_obj_destroy(struct mlx5_core_dev *dev, u32 obj_id, + u16 obj_type) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, obj_type); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct cmd_msg_cache *ch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 86f2690c5e01..2b3254e33fe5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -168,6 +168,13 @@ struct page_pool; #define mlx5e_state_dereference(priv, p) \ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) +enum mlx5e_devcom_events { + MPV_DEVCOM_MASTER_UP, + MPV_DEVCOM_MASTER_DOWN, + MPV_DEVCOM_IPSEC_MASTER_UP, + MPV_DEVCOM_IPSEC_MASTER_DOWN, +}; + static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev) { if (mlx5_lag_is_lacp_owner(mdev)) @@ -936,6 +943,7 @@ struct mlx5e_priv { struct mlx5e_htb *htb; struct mlx5e_mqprio_rl *mqprio_rl; struct dentry *dfs_root; + struct mlx5_devcom_comp_dev *devcom; }; struct mlx5e_dev { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 7d4ceb9b9c16..62f9c19f1028 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -850,6 +850,7 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); ipsec->mdev = priv->mdev; + init_completion(&ipsec->comp); ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, priv->netdev->name); if (!ipsec->wq) @@ -870,7 +871,7 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) } ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv); - ret = mlx5e_accel_ipsec_fs_init(ipsec); + ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom); if (ret) goto err_fs_init; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 9e7c42c2f77b..8f4a37bceaf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -38,6 +38,7 @@ #include <net/xfrm.h> #include <linux/idr.h> #include "lib/aso.h" +#include "lib/devcom.h" #define MLX5E_IPSEC_SADB_RX_BITS 10 #define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L @@ -221,12 +222,20 @@ struct mlx5e_ipsec_tx_create_attr { enum mlx5_flow_namespace_type chains_ns; }; +struct mlx5e_ipsec_mpv_work { + int event; + struct work_struct work; + struct mlx5e_priv *slave_priv; + struct mlx5e_priv *master_priv; +}; + struct mlx5e_ipsec { struct mlx5_core_dev *mdev; struct xarray sadb; struct mlx5e_ipsec_sw_stats sw_stats; struct mlx5e_ipsec_hw_stats hw_stats; struct workqueue_struct *wq; + struct completion comp; struct mlx5e_flow_steering *fs; struct mlx5e_ipsec_rx *rx_ipv4; struct mlx5e_ipsec_rx *rx_ipv6; @@ -238,6 +247,7 @@ struct mlx5e_ipsec { struct notifier_block netevent_nb; struct mlx5_ipsec_fs *roce; u8 is_uplink_rep: 1; + struct mlx5e_ipsec_mpv_work mpv_work; }; struct mlx5e_ipsec_esn_state { @@ -302,7 +312,7 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec); -int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec, struct mlx5_devcom_comp_dev **devcom); int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry); @@ -328,6 +338,10 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs); +void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, + struct mlx5e_priv *master_priv); +void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event); + static inline struct mlx5_core_dev * mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) { @@ -363,6 +377,15 @@ static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } + +static inline void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, + struct mlx5e_priv *master_priv) +{ +} + +static inline void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event) +{ +} #endif #endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 7dba4221993f..ef4dfc8442a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -229,6 +229,83 @@ out: return err; } +static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, XFRM_DEV_OFFLOAD_PACKET); + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(ipsec->fs, false); + struct mlx5_flow_destination old_dest, new_dest; + + old_dest = mlx5_ttc_get_default_dest(mlx5e_fs_get_ttc(ipsec->fs, false), + family2tt(family)); + + mlx5_ipsec_fs_roce_rx_create(ipsec->mdev, ipsec->roce, ns, &old_dest, family, + MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, MLX5E_NIC_PRIO); + + new_dest.ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); + new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + mlx5_modify_rule_destination(rx->status.rule, &new_dest, &old_dest); + mlx5_modify_rule_destination(rx->sa.rule, &new_dest, &old_dest); +} + +static void handle_ipsec_rx_cleanup(struct mlx5e_ipsec *ipsec, u32 family) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, XFRM_DEV_OFFLOAD_PACKET); + struct mlx5_flow_destination old_dest, new_dest; + + old_dest.ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); + old_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + new_dest = mlx5_ttc_get_default_dest(mlx5e_fs_get_ttc(ipsec->fs, false), + family2tt(family)); + mlx5_modify_rule_destination(rx->sa.rule, &new_dest, &old_dest); + mlx5_modify_rule_destination(rx->status.rule, &new_dest, &old_dest); + + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, ipsec->mdev); +} + +static void ipsec_mpv_work_handler(struct work_struct *_work) +{ + struct mlx5e_ipsec_mpv_work *work = container_of(_work, struct mlx5e_ipsec_mpv_work, work); + struct mlx5e_ipsec *ipsec = work->slave_priv->ipsec; + + switch (work->event) { + case MPV_DEVCOM_IPSEC_MASTER_UP: + mutex_lock(&ipsec->tx->ft.mutex); + if (ipsec->tx->ft.refcnt) + mlx5_ipsec_fs_roce_tx_create(ipsec->mdev, ipsec->roce, ipsec->tx->ft.pol, + true); + mutex_unlock(&ipsec->tx->ft.mutex); + + mutex_lock(&ipsec->rx_ipv4->ft.mutex); + if (ipsec->rx_ipv4->ft.refcnt) + handle_ipsec_rx_bringup(ipsec, AF_INET); + mutex_unlock(&ipsec->rx_ipv4->ft.mutex); + + mutex_lock(&ipsec->rx_ipv6->ft.mutex); + if (ipsec->rx_ipv6->ft.refcnt) + handle_ipsec_rx_bringup(ipsec, AF_INET6); + mutex_unlock(&ipsec->rx_ipv6->ft.mutex); + break; + case MPV_DEVCOM_IPSEC_MASTER_DOWN: + mutex_lock(&ipsec->tx->ft.mutex); + if (ipsec->tx->ft.refcnt) + mlx5_ipsec_fs_roce_tx_destroy(ipsec->roce, ipsec->mdev); + mutex_unlock(&ipsec->tx->ft.mutex); + + mutex_lock(&ipsec->rx_ipv4->ft.mutex); + if (ipsec->rx_ipv4->ft.refcnt) + handle_ipsec_rx_cleanup(ipsec, AF_INET); + mutex_unlock(&ipsec->rx_ipv4->ft.mutex); + + mutex_lock(&ipsec->rx_ipv6->ft.mutex); + if (ipsec->rx_ipv6->ft.refcnt) + handle_ipsec_rx_cleanup(ipsec, AF_INET6); + mutex_unlock(&ipsec->rx_ipv6->ft.mutex); + break; + } + + complete(&work->master_priv->ipsec->comp); +} + static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, u32 family) { struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); @@ -264,7 +341,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, } mlx5_destroy_flow_table(rx->ft.status); - mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); } static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, @@ -422,7 +499,7 @@ err_fs_ft: err_add: mlx5_destroy_flow_table(rx->ft.status); err_fs_ft_status: - mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); return err; } @@ -562,7 +639,7 @@ err_rule: static void tx_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce) { - mlx5_ipsec_fs_roce_tx_destroy(roce); + mlx5_ipsec_fs_roce_tx_destroy(roce, ipsec->mdev); if (tx->chains) { ipsec_chains_destroy(tx->chains); } else { @@ -665,7 +742,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, } connect_roce: - err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol); + err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol, false); if (err) goto err_roce; return 0; @@ -1888,7 +1965,8 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) } } -int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec, + struct mlx5_devcom_comp_dev **devcom) { struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_flow_namespace *ns, *ns_esw; @@ -1940,7 +2018,9 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) ipsec->tx_esw->ns = ns_esw; xa_init_flags(&ipsec->rx_esw->ipsec_obj_id_map, XA_FLAGS_ALLOC1); } else if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) { - ipsec->roce = mlx5_ipsec_fs_roce_init(mdev); + ipsec->roce = mlx5_ipsec_fs_roce_init(mdev, devcom); + } else { + mlx5_core_warn(mdev, "IPsec was initialized without RoCE support\n"); } return 0; @@ -1987,3 +2067,33 @@ bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) return rx->allow_tunnel_mode; } + +void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, + struct mlx5e_priv *master_priv) +{ + struct mlx5e_ipsec_mpv_work *work; + + reinit_completion(&master_priv->ipsec->comp); + + if (!slave_priv->ipsec) { + complete(&master_priv->ipsec->comp); + return; + } + + work = &slave_priv->ipsec->mpv_work; + + INIT_WORK(&work->work, ipsec_mpv_work_handler); + work->event = event; + work->slave_priv = slave_priv; + work->master_priv = master_priv; + queue_work(slave_priv->ipsec->wq, &work->work); +} + +void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event) +{ + if (!priv->ipsec) + return; /* IPsec not supported */ + + mlx5_devcom_send_event(priv->devcom, event, event, priv); + wait_for_completion(&priv->ipsec->comp); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 3245d1c9d539..a91f772dc981 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -5,6 +5,7 @@ #include "en.h" #include "ipsec.h" #include "lib/crypto.h" +#include "lib/ipsec_fs_roce.h" enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, @@ -63,7 +64,7 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) caps |= MLX5_IPSEC_CAP_ESPINUDP; } - if (mlx5_get_roce_state(mdev) && + if (mlx5_get_roce_state(mdev) && mlx5_ipsec_fs_is_mpv_roce_supported(mdev) && MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA && MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_TX_RDMA_2_NIC_TX) caps |= MLX5_IPSEC_CAP_ROCE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index c9c1db971652..d4ebd8743114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -580,7 +580,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - if (tx_sa->next_pn != ctx_tx_sa->next_pn_halves.lower) { + if (ctx->sa.update_pn) { netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n", assoc_num); err = -EINVAL; @@ -973,7 +973,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - if (rx_sa->next_pn != ctx_rx_sa->next_pn_halves.lower) { + if (ctx->sa.update_pn) { netdev_err(ctx->netdev, "MACsec offload update RX sa %d PN isn't supported\n", assoc_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a2ae791538ed..afc3d488b8f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -69,6 +69,7 @@ #include "en/htb.h" #include "qos.h" #include "en/trap.h" +#include "lib/devcom.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) @@ -178,6 +179,61 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) mlx5_notifier_unregister(priv->mdev, &priv->events_nb); } +static int mlx5e_devcom_event_mpv(int event, void *my_data, void *event_data) +{ + struct mlx5e_priv *slave_priv = my_data; + + switch (event) { + case MPV_DEVCOM_MASTER_UP: + mlx5_devcom_comp_set_ready(slave_priv->devcom, true); + break; + case MPV_DEVCOM_MASTER_DOWN: + /* no need for comp set ready false since we unregister after + * and it hurts cleanup flow. + */ + break; + case MPV_DEVCOM_IPSEC_MASTER_UP: + case MPV_DEVCOM_IPSEC_MASTER_DOWN: + mlx5e_ipsec_handle_mpv_event(event, my_data, event_data); + break; + } + + return 0; +} + +static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) +{ + priv->devcom = mlx5_devcom_register_component(priv->mdev->priv.devc, + MLX5_DEVCOM_MPV, + *data, + mlx5e_devcom_event_mpv, + priv); + if (IS_ERR_OR_NULL(priv->devcom)) + return -EOPNOTSUPP; + + if (mlx5_core_is_mp_master(priv->mdev)) { + mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP, + MPV_DEVCOM_MASTER_UP, priv); + mlx5e_ipsec_send_event(priv, MPV_DEVCOM_IPSEC_MASTER_UP); + } + + return 0; +} + +static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->devcom)) + return; + + if (mlx5_core_is_mp_master(priv->mdev)) { + mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_DOWN, + MPV_DEVCOM_MASTER_DOWN, priv); + mlx5e_ipsec_send_event(priv, MPV_DEVCOM_IPSEC_MASTER_DOWN); + } + + mlx5_devcom_unregister_component(priv->devcom); +} + static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); @@ -192,6 +248,13 @@ static int blocking_event(struct notifier_block *nb, unsigned long event, void * return NOTIFY_BAD; } break; + case MLX5_DRIVER_EVENT_AFFILIATION_DONE: + if (mlx5e_devcom_init_mpv(priv, data)) + return NOTIFY_BAD; + break; + case MLX5_DRIVER_EVENT_AFFILIATION_REMOVED: + mlx5e_devcom_cleanup_mpv(priv); + break; default: return NOTIFY_DONE; } @@ -3952,13 +4015,14 @@ static int set_feature_rx_fcs(struct net_device *netdev, bool enable) struct mlx5e_channels *chs = &priv->channels; struct mlx5e_params new_params; int err; + bool rx_ts_over_crc = !enable; mutex_lock(&priv->state_lock); new_params = chs->params; new_params.scatter_fcs_en = enable; err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, - &new_params.scatter_fcs_en, true); + &rx_ts_over_crc, true); mutex_unlock(&priv->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a13b9c2bd144..e6bfa7e4f146 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -114,9 +114,9 @@ #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) /* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, - * IPsec RoCE policy + * {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 9 +#define KERNEL_NIC_PRIO_NUM_LEVELS 11 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) @@ -137,7 +137,7 @@ #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1) #define KERNEL_TX_IPSEC_NUM_PRIOS 1 -#define KERNEL_TX_IPSEC_NUM_LEVELS 3 +#define KERNEL_TX_IPSEC_NUM_LEVELS 4 #define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) #define KERNEL_TX_MACSEC_NUM_PRIOS 1 @@ -231,7 +231,7 @@ enum { }; #define RDMA_RX_IPSEC_NUM_PRIOS 1 -#define RDMA_RX_IPSEC_NUM_LEVELS 2 +#define RDMA_RX_IPSEC_NUM_LEVELS 4 #define RDMA_RX_IPSEC_MIN_LEVEL (RDMA_RX_IPSEC_NUM_LEVELS) #define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS @@ -288,7 +288,7 @@ enum { #define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS #define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1) -#define RDMA_TX_IPSEC_NUM_PRIOS 1 +#define RDMA_TX_IPSEC_NUM_PRIOS 2 #define RDMA_TX_IPSEC_PRIO_NUM_LEVELS 1 #define RDMA_TX_IPSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_IPSEC_NUM_PRIOS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 8389ac0af708..8220d180e33c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -8,6 +8,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_ESW_OFFLOADS, + MLX5_DEVCOM_MPV, MLX5_DEVCOM_NUM_COMPONENTS, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c index 6e3f178d6f84..234cd00f71a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -2,8 +2,11 @@ /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include "fs_core.h" +#include "fs_cmd.h" +#include "en.h" #include "lib/ipsec_fs_roce.h" #include "mlx5_core.h" +#include <linux/random.h> struct mlx5_ipsec_miss { struct mlx5_flow_group *group; @@ -15,6 +18,12 @@ struct mlx5_ipsec_rx_roce { struct mlx5_flow_table *ft; struct mlx5_flow_handle *rule; struct mlx5_ipsec_miss roce_miss; + struct mlx5_flow_table *nic_master_ft; + struct mlx5_flow_group *nic_master_group; + struct mlx5_flow_handle *nic_master_rule; + struct mlx5_flow_table *goto_alias_ft; + u32 alias_id; + char key[ACCESS_KEY_LEN]; struct mlx5_flow_table *ft_rdma; struct mlx5_flow_namespace *ns_rdma; @@ -24,6 +33,9 @@ struct mlx5_ipsec_tx_roce { struct mlx5_flow_group *g; struct mlx5_flow_table *ft; struct mlx5_flow_handle *rule; + struct mlx5_flow_table *goto_alias_ft; + u32 alias_id; + char key[ACCESS_KEY_LEN]; struct mlx5_flow_namespace *ns; }; @@ -31,6 +43,7 @@ struct mlx5_ipsec_fs { struct mlx5_ipsec_rx_roce ipv4_rx; struct mlx5_ipsec_rx_roce ipv6_rx; struct mlx5_ipsec_tx_roce tx; + struct mlx5_devcom_comp_dev **devcom; }; static void ipsec_fs_roce_setup_udp_dport(struct mlx5_flow_spec *spec, @@ -43,11 +56,83 @@ static void ipsec_fs_roce_setup_udp_dport(struct mlx5_flow_spec *spec, MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, dport); } +static bool ipsec_fs_create_alias_supported_one(struct mlx5_core_dev *mdev) +{ + u64 obj_allowed = MLX5_CAP_GEN_2_64(mdev, allowed_object_for_other_vhca_access); + u32 obj_supp = MLX5_CAP_GEN_2(mdev, cross_vhca_object_to_object_supported); + + if (!(obj_supp & + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS)) + return false; + + if (!(obj_allowed & MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE)) + return false; + + return true; +} + +static bool ipsec_fs_create_alias_supported(struct mlx5_core_dev *mdev, + struct mlx5_core_dev *master_mdev) +{ + if (ipsec_fs_create_alias_supported_one(mdev) && + ipsec_fs_create_alias_supported_one(master_mdev)) + return true; + + return false; +} + +static int ipsec_fs_create_aliased_ft(struct mlx5_core_dev *ibv_owner, + struct mlx5_core_dev *ibv_allowed, + struct mlx5_flow_table *ft, + u32 *obj_id, char *alias_key, bool from_event) +{ + u32 aliased_object_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id; + u16 vhca_id_to_be_accessed = MLX5_CAP_GEN(ibv_owner, vhca_id); + struct mlx5_cmd_allow_other_vhca_access_attr allow_attr = {}; + struct mlx5_cmd_alias_obj_create_attr alias_attr = {}; + int ret; + int i; + + if (!ipsec_fs_create_alias_supported(ibv_owner, ibv_allowed)) + return -EOPNOTSUPP; + + for (i = 0; i < ACCESS_KEY_LEN; i++) + if (!from_event) + alias_key[i] = get_random_u64() & 0xFF; + + memcpy(allow_attr.access_key, alias_key, ACCESS_KEY_LEN); + allow_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS; + allow_attr.obj_id = aliased_object_id; + + if (!from_event) { + ret = mlx5_cmd_allow_other_vhca_access(ibv_owner, &allow_attr); + if (ret) { + mlx5_core_err(ibv_owner, "Failed to allow other vhca access err=%d\n", + ret); + return ret; + } + } + + memcpy(alias_attr.access_key, alias_key, ACCESS_KEY_LEN); + alias_attr.obj_id = aliased_object_id; + alias_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS; + alias_attr.vhca_id = vhca_id_to_be_accessed; + ret = mlx5_cmd_alias_obj_create(ibv_allowed, &alias_attr, obj_id); + if (ret) { + mlx5_core_err(ibv_allowed, "Failed to create alias object err=%d\n", + ret); + return ret; + } + + return 0; +} + static int ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, struct mlx5_flow_destination *default_dst, struct mlx5_ipsec_rx_roce *roce) { + bool is_mpv_slave = mlx5_core_is_mp_slave(mdev); struct mlx5_flow_destination dst = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; @@ -61,14 +146,19 @@ ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, ipsec_fs_roce_setup_udp_dport(spec, ROCE_V2_UDP_DPORT); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; - dst.ft = roce->ft_rdma; + if (is_mpv_slave) { + dst.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dst.ft = roce->goto_alias_ft; + } else { + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_rdma; + } rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "Fail to add RX RoCE IPsec rule err=%d\n", err); - goto fail_add_rule; + goto out; } roce->rule = rule; @@ -84,12 +174,30 @@ ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, roce->roce_miss.rule = rule; + if (!is_mpv_slave) + goto out; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_rdma; + rule = mlx5_add_flow_rules(roce->nic_master_ft, NULL, &flow_act, &dst, + 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec rule for alias err=%d\n", + err); + goto fail_add_nic_master_rule; + } + roce->nic_master_rule = rule; + kvfree(spec); return 0; +fail_add_nic_master_rule: + mlx5_del_flow_rules(roce->roce_miss.rule); fail_add_default_rule: mlx5_del_flow_rules(roce->rule); -fail_add_rule: +out: kvfree(spec); return err; } @@ -120,25 +228,373 @@ out: return err; } -void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce) +static int ipsec_fs_roce_tx_mpv_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_tx_roce *roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.source_vhca_port); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.source_vhca_port, + MLX5_CAP_GEN(mdev, native_port_num)); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->goto_alias_ft; + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add TX RoCE IPsec rule err=%d\n", + err); + goto out; + } + roce->rule = rule; + + /* No need for miss rule, since on miss we go to next PRIO, in which + * if master is configured, he will catch the traffic to go to his + * encryption table. + */ + +out: + kvfree(spec); + return err; +} + +#define MLX5_TX_ROCE_GROUP_SIZE BIT(0) +#define MLX5_IPSEC_RDMA_TX_FT_LEVEL 0 +#define MLX5_IPSEC_NIC_GOTO_ALIAS_FT_LEVEL 3 /* Since last used level in NIC ipsec is 2 */ + +static int ipsec_fs_roce_tx_mpv_create_ft(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_tx_roce *roce, + struct mlx5_flow_table *pol_ft, + struct mlx5e_priv *peer_priv, + bool from_event) +{ + struct mlx5_flow_namespace *roce_ns, *nic_ns; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table next_ft; + struct mlx5_flow_table *ft; + int err; + + roce_ns = mlx5_get_flow_namespace(peer_priv->mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC); + if (!roce_ns) + return -EOPNOTSUPP; + + nic_ns = mlx5_get_flow_namespace(peer_priv->mdev, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); + if (!nic_ns) + return -EOPNOTSUPP; + + err = ipsec_fs_create_aliased_ft(mdev, peer_priv->mdev, pol_ft, &roce->alias_id, roce->key, + from_event); + if (err) + return err; + + next_ft.id = roce->alias_id; + ft_attr.max_fte = 1; + ft_attr.next_ft = &next_ft; + ft_attr.level = MLX5_IPSEC_NIC_GOTO_ALIAS_FT_LEVEL; + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft = mlx5_create_flow_table(nic_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec goto alias ft err=%d\n", err); + goto destroy_alias; + } + + roce->goto_alias_ft = ft; + + memset(&ft_attr, 0, sizeof(ft_attr)); + ft_attr.max_fte = 1; + ft_attr.level = MLX5_IPSEC_RDMA_TX_FT_LEVEL; + ft = mlx5_create_flow_table(roce_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx ft err=%d\n", err); + goto destroy_alias_ft; + } + + roce->ft = ft; + + return 0; + +destroy_alias_ft: + mlx5_destroy_flow_table(roce->goto_alias_ft); +destroy_alias: + mlx5_cmd_alias_obj_destroy(peer_priv->mdev, roce->alias_id, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); + return err; +} + +static int ipsec_fs_roce_tx_mpv_create_group_rules(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_tx_roce *roce, + struct mlx5_flow_table *pol_ft, + u32 *in) { + struct mlx5_flow_group *g; + int ix = 0; + int err; + u8 *mc; + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters.source_vhca_port); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(roce->ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx group err=%d\n", err); + return err; + } + roce->g = g; + + err = ipsec_fs_roce_tx_mpv_rule_setup(mdev, roce, pol_ft); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx rules err=%d\n", err); + goto destroy_group; + } + + return 0; + +destroy_group: + mlx5_destroy_flow_group(roce->g); + return err; +} + +static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft, + u32 *in, bool from_event) +{ + struct mlx5_devcom_comp_dev *tmp = NULL; + struct mlx5_ipsec_tx_roce *roce; + struct mlx5e_priv *peer_priv; + int err; + + if (!mlx5_devcom_for_each_peer_begin(*ipsec_roce->devcom)) + return -EOPNOTSUPP; + + peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); + if (!peer_priv) { + err = -EOPNOTSUPP; + goto release_peer; + } + + roce = &ipsec_roce->tx; + + err = ipsec_fs_roce_tx_mpv_create_ft(mdev, roce, pol_ft, peer_priv, from_event); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec tables err=%d\n", err); + goto release_peer; + } + + err = ipsec_fs_roce_tx_mpv_create_group_rules(mdev, roce, pol_ft, in); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx group/rule err=%d\n", err); + goto destroy_tables; + } + + mlx5_devcom_for_each_peer_end(*ipsec_roce->devcom); + return 0; + +destroy_tables: + mlx5_destroy_flow_table(roce->ft); + mlx5_destroy_flow_table(roce->goto_alias_ft); + mlx5_cmd_alias_obj_destroy(peer_priv->mdev, roce->alias_id, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); +release_peer: + mlx5_devcom_for_each_peer_end(*ipsec_roce->devcom); + return err; +} + +static void roce_rx_mpv_destroy_tables(struct mlx5_core_dev *mdev, struct mlx5_ipsec_rx_roce *roce) +{ + mlx5_destroy_flow_table(roce->goto_alias_ft); + mlx5_cmd_alias_obj_destroy(mdev, roce->alias_id, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); + mlx5_destroy_flow_group(roce->nic_master_group); + mlx5_destroy_flow_table(roce->nic_master_ft); +} + +#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) +#define MLX5_IPSEC_RX_IPV4_FT_LEVEL 3 +#define MLX5_IPSEC_RX_IPV6_FT_LEVEL 2 + +static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + u32 family, u32 level, u32 prio) +{ + struct mlx5_flow_namespace *roce_ns, *nic_ns; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_devcom_comp_dev *tmp = NULL; + struct mlx5_ipsec_rx_roce *roce; + struct mlx5_flow_table next_ft; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + struct mlx5e_priv *peer_priv; + int ix = 0; + u32 *in; + int err; + + roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + if (!mlx5_devcom_for_each_peer_begin(*ipsec_roce->devcom)) + return -EOPNOTSUPP; + + peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); + if (!peer_priv) { + err = -EOPNOTSUPP; + goto release_peer; + } + + roce_ns = mlx5_get_flow_namespace(peer_priv->mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC); + if (!roce_ns) { + err = -EOPNOTSUPP; + goto release_peer; + } + + nic_ns = mlx5_get_flow_namespace(peer_priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + if (!nic_ns) { + err = -EOPNOTSUPP; + goto release_peer; + } + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto release_peer; + } + + ft_attr.level = (family == AF_INET) ? MLX5_IPSEC_RX_IPV4_FT_LEVEL : + MLX5_IPSEC_RX_IPV6_FT_LEVEL; + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(roce_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at rdma master err=%d\n", err); + goto free_in; + } + + roce->ft_rdma = ft; + + ft_attr.max_fte = 1; + ft_attr.prio = prio; + ft_attr.level = level + 2; + ft = mlx5_create_flow_table(nic_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at NIC master err=%d\n", err); + goto destroy_ft_rdma; + } + roce->nic_master_ft = ft; + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += 1; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(roce->nic_master_ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx group aliased err=%d\n", err); + goto destroy_nic_master_ft; + } + roce->nic_master_group = g; + + err = ipsec_fs_create_aliased_ft(peer_priv->mdev, mdev, roce->nic_master_ft, + &roce->alias_id, roce->key, false); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx alias FT err=%d\n", err); + goto destroy_group; + } + + next_ft.id = roce->alias_id; + ft_attr.max_fte = 1; + ft_attr.prio = prio; + ft_attr.level = roce->ft->level + 1; + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.next_ft = &next_ft; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at NIC slave err=%d\n", err); + goto destroy_alias; + } + roce->goto_alias_ft = ft; + + kvfree(in); + mlx5_devcom_for_each_peer_end(*ipsec_roce->devcom); + return 0; + +destroy_alias: + mlx5_cmd_alias_obj_destroy(mdev, roce->alias_id, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); +destroy_group: + mlx5_destroy_flow_group(roce->nic_master_group); +destroy_nic_master_ft: + mlx5_destroy_flow_table(roce->nic_master_ft); +destroy_ft_rdma: + mlx5_destroy_flow_table(roce->ft_rdma); +free_in: + kvfree(in); +release_peer: + mlx5_devcom_for_each_peer_end(*ipsec_roce->devcom); + return err; +} + +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_core_dev *mdev) +{ + struct mlx5_devcom_comp_dev *tmp = NULL; struct mlx5_ipsec_tx_roce *tx_roce; + struct mlx5e_priv *peer_priv; if (!ipsec_roce) return; tx_roce = &ipsec_roce->tx; + if (!tx_roce->ft) + return; /* Incase RoCE was cleaned from MPV event flow */ + mlx5_del_flow_rules(tx_roce->rule); mlx5_destroy_flow_group(tx_roce->g); mlx5_destroy_flow_table(tx_roce->ft); -} -#define MLX5_TX_ROCE_GROUP_SIZE BIT(0) + if (!mlx5_core_is_mp_slave(mdev)) + return; + + if (!mlx5_devcom_for_each_peer_begin(*ipsec_roce->devcom)) + return; + + peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); + if (!peer_priv) { + mlx5_devcom_for_each_peer_end(*ipsec_roce->devcom); + return; + } + + mlx5_destroy_flow_table(tx_roce->goto_alias_ft); + mlx5_cmd_alias_obj_destroy(peer_priv->mdev, tx_roce->alias_id, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); + mlx5_devcom_for_each_peer_end(*ipsec_roce->devcom); + tx_roce->ft = NULL; +} int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, struct mlx5_ipsec_fs *ipsec_roce, - struct mlx5_flow_table *pol_ft) + struct mlx5_flow_table *pol_ft, + bool from_event) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_ipsec_tx_roce *roce; @@ -157,7 +613,14 @@ int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, if (!in) return -ENOMEM; + if (mlx5_core_is_mp_slave(mdev)) { + err = ipsec_fs_roce_tx_mpv_create(mdev, ipsec_roce, pol_ft, in, from_event); + goto free_in; + } + ft_attr.max_fte = 1; + ft_attr.prio = 1; + ft_attr.level = MLX5_IPSEC_RDMA_TX_FT_LEVEL; ft = mlx5_create_flow_table(roce->ns, &ft_attr); if (IS_ERR(ft)) { err = PTR_ERR(ft); @@ -209,8 +672,10 @@ struct mlx5_flow_table *mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_ro return rx_roce->ft; } -void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family, + struct mlx5_core_dev *mdev) { + bool is_mpv_slave = mlx5_core_is_mp_slave(mdev); struct mlx5_ipsec_rx_roce *rx_roce; if (!ipsec_roce) @@ -218,23 +683,29 @@ void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family) rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : &ipsec_roce->ipv6_rx; + if (!rx_roce->ft) + return; /* Incase RoCE was cleaned from MPV event flow */ + if (is_mpv_slave) + mlx5_del_flow_rules(rx_roce->nic_master_rule); mlx5_del_flow_rules(rx_roce->roce_miss.rule); mlx5_del_flow_rules(rx_roce->rule); + if (is_mpv_slave) + roce_rx_mpv_destroy_tables(mdev, rx_roce); mlx5_destroy_flow_table(rx_roce->ft_rdma); mlx5_destroy_flow_group(rx_roce->roce_miss.group); mlx5_destroy_flow_group(rx_roce->g); mlx5_destroy_flow_table(rx_roce->ft); + rx_roce->ft = NULL; } -#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) - int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, struct mlx5_ipsec_fs *ipsec_roce, struct mlx5_flow_namespace *ns, struct mlx5_flow_destination *default_dst, u32 family, u32 level, u32 prio) { + bool is_mpv_slave = mlx5_core_is_mp_slave(mdev); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_ipsec_rx_roce *roce; struct mlx5_flow_table *ft; @@ -298,17 +769,27 @@ int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, } roce->roce_miss.group = g; - memset(&ft_attr, 0, sizeof(ft_attr)); - if (family == AF_INET) - ft_attr.level = 1; - ft = mlx5_create_flow_table(roce->ns_rdma, &ft_attr); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at rdma err=%d\n", err); - goto fail_rdma_table; - } + if (is_mpv_slave) { + err = ipsec_fs_roce_rx_mpv_create(mdev, ipsec_roce, ns, family, level, prio); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx alias err=%d\n", err); + goto fail_mpv_create; + } + } else { + memset(&ft_attr, 0, sizeof(ft_attr)); + if (family == AF_INET) + ft_attr.level = 1; + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(roce->ns_rdma, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Fail to create RoCE IPsec rx ft at rdma err=%d\n", err); + goto fail_rdma_table; + } - roce->ft_rdma = ft; + roce->ft_rdma = ft; + } err = ipsec_fs_roce_rx_rule_setup(mdev, default_dst, roce); if (err) { @@ -320,7 +801,10 @@ int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, return 0; fail_setup_rule: + if (is_mpv_slave) + roce_rx_mpv_destroy_tables(mdev, roce); mlx5_destroy_flow_table(roce->ft_rdma); +fail_mpv_create: fail_rdma_table: mlx5_destroy_flow_group(roce->roce_miss.group); fail_mgroup: @@ -332,12 +816,24 @@ fail_nomem: return err; } +bool mlx5_ipsec_fs_is_mpv_roce_supported(struct mlx5_core_dev *mdev) +{ + if (!mlx5_core_mp_enabled(mdev)) + return true; + + if (ipsec_fs_create_alias_supported_one(mdev)) + return true; + + return false; +} + void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce) { kfree(ipsec_roce); } -struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev) +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev, + struct mlx5_devcom_comp_dev **devcom) { struct mlx5_ipsec_fs *roce_ipsec; struct mlx5_flow_namespace *ns; @@ -363,6 +859,8 @@ struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev) roce_ipsec->tx.ns = ns; + roce_ipsec->devcom = devcom; + return roce_ipsec; err_tx: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h index 9712d705fe48..2a1af78309fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h @@ -4,22 +4,28 @@ #ifndef __MLX5_LIB_IPSEC_H__ #define __MLX5_LIB_IPSEC_H__ +#include "lib/devcom.h" + struct mlx5_ipsec_fs; struct mlx5_flow_table * mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family); void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, - u32 family); + u32 family, struct mlx5_core_dev *mdev); int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, struct mlx5_ipsec_fs *ipsec_roce, struct mlx5_flow_namespace *ns, struct mlx5_flow_destination *default_dst, u32 family, u32 level, u32 prio); -void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce); +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_core_dev *mdev); int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, struct mlx5_ipsec_fs *ipsec_roce, - struct mlx5_flow_table *pol_ft); + struct mlx5_flow_table *pol_ft, + bool from_event); void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce); -struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev); +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev, + struct mlx5_devcom_comp_dev **devcom); +bool mlx5_ipsec_fs_is_mpv_roce_supported(struct mlx5_core_dev *mdev); #endif /* __MLX5_LIB_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d17c9c31b165..307ffe6300f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -361,6 +361,12 @@ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) +{ + mlx5_blocking_notifier_call_chain(dev, event, data); +} +EXPORT_SYMBOL(mlx5_core_mp_event_replay); + int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 124352459c23..19ffd1816474 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -97,6 +97,22 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define ACCESS_KEY_LEN 32 +#define FT_ID_FT_TYPE_OFFSET 24 + +struct mlx5_cmd_allow_other_vhca_access_attr { + u16 obj_type; + u32 obj_id; + u8 access_key[ACCESS_KEY_LEN]; +}; + +struct mlx5_cmd_alias_obj_create_attr { + u32 obj_id; + u16 vhca_id; + u16 obj_type; + u8 access_key[ACCESS_KEY_LEN]; +}; + static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...) { struct device *device = dev->device; @@ -343,6 +359,12 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev); bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); +int mlx5_cmd_allow_other_vhca_access(struct mlx5_core_dev *dev, + struct mlx5_cmd_allow_other_vhca_access_attr *attr); +int mlx5_cmd_alias_obj_create(struct mlx5_core_dev *dev, + struct mlx5_cmd_alias_obj_create_attr *alias_attr, + u32 *obj_id); +int mlx5_cmd_alias_obj_destroy(struct mlx5_core_dev *dev, u32 obj_id, u16 obj_type); static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index f709e44c76a8..f1b48d6615f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -536,7 +536,7 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal, static int mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) { - char tz_name[THERMAL_NAME_LENGTH]; + char tz_name[40]; int ret; if (gearbox_tz->slot_index) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 51eea1f0529c..7fae963b2608 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -352,14 +352,15 @@ static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe, } static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, - struct mlxsw_pci_queue_elem_info *elem_info) + struct mlxsw_pci_queue_elem_info *elem_info, + gfp_t gfp) { size_t buf_len = MLXSW_PORT_MAX_MTU; char *wqe = elem_info->elem; struct sk_buff *skb; int err; - skb = netdev_alloc_skb_ip_align(NULL, buf_len); + skb = __netdev_alloc_skb_ip_align(NULL, buf_len, gfp); if (!skb) return -ENOMEM; @@ -420,7 +421,7 @@ static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, for (i = 0; i < q->count; i++) { elem_info = mlxsw_pci_queue_elem_info_producer_get(q); BUG_ON(!elem_info); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_KERNEL); if (err) goto rollback; /* Everything is set up, ring doorbell to pass elem to HW */ @@ -640,7 +641,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info, GFP_ATOMIC); if (err) { dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); goto out; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 472830d07ac1..0f29e9c19411 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -619,7 +619,7 @@ static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) int i; for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) { - snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d", + snprintf(*p, ETH_GSTRING_LEN, "%.28s_%d", mlxsw_sp_port_hw_tc_stats[i].str, tc); *p += ETH_GSTRING_LEN; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index bb8eeb86edf7..52c2fe3644d4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -310,8 +310,8 @@ const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, }; -static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, - bool learning_en) +static int mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, + bool learning_en) { char tnpc_pl[MLXSW_REG_TNPC_LEN]; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 01f3a3a41cdb..37d2584b48a7 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1012,8 +1012,7 @@ static void sparx5_get_sset_strings(struct net_device *ndev, u32 sset, u8 *data) return; for (idx = 0; idx < sparx5->num_ethtool_stats; idx++) - strncpy(data + idx * ETH_GSTRING_LEN, - sparx5->stats_layout[idx], ETH_GSTRING_LEN); + ethtool_sprintf(&data, "%s", sparx5->stats_layout[idx]); } static void sparx5_get_sset_data(struct net_device *ndev, diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index b1f026b81dea..cc54faca2283 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -378,6 +378,34 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x, /* Encryption */ switch (x->props.ealgo) { case SADB_EALG_NONE: + /* The xfrm descriptor for CHACAH20_POLY1305 does not set the algorithm id, which + * is the default value SADB_EALG_NONE. In the branch of SADB_EALG_NONE, driver + * uses algorithm name to identify CHACAH20_POLY1305's algorithm. + */ + if (x->aead && !strcmp(x->aead->alg_name, "rfc7539esp(chacha20,poly1305)")) { + if (nn->pdev->device != PCI_DEVICE_ID_NFP3800) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported encryption algorithm for offload"); + return -EINVAL; + } + if (x->aead->alg_icv_len != 128) { + NL_SET_ERR_MSG_MOD(extack, + "ICV must be 128bit with CHACHA20_POLY1305"); + return -EINVAL; + } + + /* Aead->alg_key_len includes 32-bit salt */ + if (x->aead->alg_key_len - 32 != 256) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported CHACHA20 key length"); + return -EINVAL; + } + + /* The CHACHA20's mode is not configured */ + cfg->ctrl_word.hash = NFP_IPSEC_HASH_POLY1305_128; + cfg->ctrl_word.cipher = NFP_IPSEC_CIPHER_CHACHA20; + break; + } + fallthrough; case SADB_EALG_NULL: cfg->ctrl_word.cimode = NFP_IPSEC_CIMODE_CBC; cfg->ctrl_word.cipher = NFP_IPSEC_CIPHER_NULL; @@ -427,6 +455,7 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x, } if (x->aead) { + int key_offset = 0; int salt_len = 4; key_len = DIV_ROUND_UP(x->aead->alg_key_len, BITS_PER_BYTE); @@ -437,9 +466,19 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x, return -EINVAL; } - for (i = 0; i < key_len / sizeof(cfg->ciph_key[0]) ; i++) - cfg->ciph_key[i] = get_unaligned_be32(x->aead->alg_key + - sizeof(cfg->ciph_key[0]) * i); + /* The CHACHA20's key order needs to be adjusted based on hardware design. + * Other's key order: {K0, K1, K2, K3, K4, K5, K6, K7} + * CHACHA20's key order: {K4, K5, K6, K7, K0, K1, K2, K3} + */ + if (!strcmp(x->aead->alg_name, "rfc7539esp(chacha20,poly1305)")) + key_offset = key_len / sizeof(cfg->ciph_key[0]) >> 1; + + for (i = 0; i < key_len / sizeof(cfg->ciph_key[0]); i++) { + int index = (i + key_offset) % (key_len / sizeof(cfg->ciph_key[0])); + + cfg->ciph_key[index] = get_unaligned_be32(x->aead->alg_key + + sizeof(cfg->ciph_key[0]) * i); + } /* Load up the salt */ cfg->aesgcm_fields.salt = get_unaligned_be32(x->aead->alg_key + key_len); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index f21cf1f40f98..153533cd8f08 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -210,6 +210,7 @@ nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); struct nfp_flower_cmsg_merge_hint *msg; struct nfp_fl_payload *sub_flows[2]; + struct nfp_flower_priv *priv; int err, i, flow_cnt; msg = nfp_flower_cmsg_get_data(skb); @@ -228,14 +229,15 @@ nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) return; } - rtnl_lock(); + priv = app->priv; + mutex_lock(&priv->nfp_fl_lock); for (i = 0; i < flow_cnt; i++) { u32 ctx = be32_to_cpu(msg->flow[i].host_ctx); sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx); if (!sub_flows[i]) { nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n"); - goto err_rtnl_unlock; + goto err_mutex_unlock; } } @@ -244,8 +246,8 @@ nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb) if (err == -ENOMEM) nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n"); -err_rtnl_unlock: - rtnl_unlock(); +err_mutex_unlock: + mutex_unlock(&priv->nfp_fl_lock); } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 2643c4b3ff1f..2967bab72505 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -2131,8 +2131,6 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl struct nfp_fl_ct_flow_entry *ct_entry; struct netlink_ext_ack *extack = NULL; - ASSERT_RTNL(); - extack = flow->common.extack; switch (flow->command) { case FLOW_CLS_REPLACE: @@ -2178,9 +2176,13 @@ int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb switch (type) { case TC_SETUP_CLSFLOWER: - rtnl_lock(); + while (!mutex_trylock(&zt->priv->nfp_fl_lock)) { + if (!zt->nft) /* avoid deadlock */ + return err; + msleep(20); + } err = nfp_fl_ct_offload_nft_flow(zt, flow); - rtnl_unlock(); + mutex_unlock(&zt->priv->nfp_fl_lock); break; default: return -EOPNOTSUPP; @@ -2208,6 +2210,7 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) struct nfp_fl_ct_flow_entry *ct_entry; struct nfp_fl_ct_zone_entry *zt; struct rhashtable *m_table; + struct nf_flowtable *nft; if (!ct_map_ent) return -ENOENT; @@ -2226,8 +2229,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent) if (ct_map_ent->cookie > 0) kfree(ct_map_ent); - if (!zt->pre_ct_count) { - zt->nft = NULL; + if (!zt->pre_ct_count && zt->nft) { + nft = zt->nft; + zt->nft = NULL; /* avoid deadlock */ + nf_flow_table_offload_del_cb(nft, + nfp_fl_ct_handle_nft_flow, + zt); nfp_fl_ct_clean_nft_entries(zt); } break; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 40372545148e..2b7c947ff4f2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -297,6 +297,7 @@ struct nfp_fl_internal_ports { * @predt_list: List to keep track of decap pretun flows * @neigh_table: Table to keep track of neighbor entries * @predt_lock: Lock to serialise predt/neigh table updates + * @nfp_fl_lock: Lock to protect the flow offload operation */ struct nfp_flower_priv { struct nfp_app *app; @@ -339,6 +340,7 @@ struct nfp_flower_priv { struct list_head predt_list; struct rhashtable neigh_table; spinlock_t predt_lock; /* Lock to serialise predt/neigh table updates */ + struct mutex nfp_fl_lock; /* Protect the flow operation */ }; /** diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 0f06ef6e24bf..80e4675582bf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -528,6 +528,8 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, if (err) goto err_free_stats_ctx_table; + mutex_init(&priv->nfp_fl_lock); + err = rhashtable_init(&priv->ct_zone_table, &nfp_zone_table_params); if (err) goto err_free_merge_table; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index c153f0575b92..0aceef9fe582 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1009,8 +1009,6 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, u64 parent_ctx = 0; int err; - ASSERT_RTNL(); - if (sub_flow1 == sub_flow2 || nfp_flower_is_merge_flow(sub_flow1) || nfp_flower_is_merge_flow(sub_flow2)) @@ -1727,19 +1725,30 @@ static int nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, struct flow_cls_offload *flower) { + struct nfp_flower_priv *priv = app->priv; + int ret; + if (!eth_proto_is_802_3(flower->common.protocol)) return -EOPNOTSUPP; + mutex_lock(&priv->nfp_fl_lock); switch (flower->command) { case FLOW_CLS_REPLACE: - return nfp_flower_add_offload(app, netdev, flower); + ret = nfp_flower_add_offload(app, netdev, flower); + break; case FLOW_CLS_DESTROY: - return nfp_flower_del_offload(app, netdev, flower); + ret = nfp_flower_del_offload(app, netdev, flower); + break; case FLOW_CLS_STATS: - return nfp_flower_get_stats(app, netdev, flower); + ret = nfp_flower_get_stats(app, netdev, flower); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + mutex_unlock(&priv->nfp_fl_lock); + + return ret; } static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, @@ -1778,6 +1787,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, repr_priv = repr->app_priv; repr_priv->block_shared = f->block_shared; f->driver_block_list = &nfp_block_cb_list; + f->unlocked_driver_cb = true; switch (f->command) { case FLOW_BLOCK_BIND: @@ -1876,6 +1886,8 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, str nfp_flower_internal_port_can_offload(app, netdev))) return -EOPNOTSUPP; + f->unlocked_driver_cb = true; + switch (f->command) { case FLOW_BLOCK_BIND: cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 99052a925d9e..e7180b4793c7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -523,25 +523,31 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, { struct netlink_ext_ack *extack = flow->common.extack; struct nfp_flower_priv *fl_priv = app->priv; + int ret; if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support qos rate limit offload"); return -EOPNOTSUPP; } + mutex_lock(&fl_priv->nfp_fl_lock); switch (flow->command) { case TC_CLSMATCHALL_REPLACE: - return nfp_flower_install_rate_limiter(app, netdev, flow, - extack); + ret = nfp_flower_install_rate_limiter(app, netdev, flow, extack); + break; case TC_CLSMATCHALL_DESTROY: - return nfp_flower_remove_rate_limiter(app, netdev, flow, - extack); + ret = nfp_flower_remove_rate_limiter(app, netdev, flow, extack); + break; case TC_CLSMATCHALL_STATS: - return nfp_flower_stats_rate_limiter(app, netdev, flow, - extack); + ret = nfp_flower_stats_rate_limiter(app, netdev, flow, extack); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + mutex_unlock(&fl_priv->nfp_fl_lock); + + return ret; } /* Offload tc action, currently only for tc police */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c index ce7492a6a98f..279ea0b56955 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c @@ -159,7 +159,7 @@ nfp_resource_acquire(struct nfp_cpp *cpp, const char *name) if (!res) return ERR_PTR(-ENOMEM); - strncpy(res->name, name, NFP_RESOURCE_ENTRY_NAME_SZ); + strscpy(res->name, name, sizeof(res->name)); dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET, NFP_RESOURCE_TBL_BASE, diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 97f4798f4b42..fa1f78b03cb2 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -683,7 +683,7 @@ static int nixge_poll(struct napi_struct *napi, int budget) if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { /* If there's more, reschedule, but clear */ nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status); - napi_reschedule(napi); + napi_schedule(napi); } else { /* if not, turn on RX IRQs again ... */ cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); @@ -755,8 +755,7 @@ static irqreturn_t nixge_rx_irq(int irq, void *_ndev) cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr); - if (napi_schedule_prep(&priv->napi)) - __napi_schedule(&priv->napi); + napi_schedule(&priv->napi); goto out; } if (!(status & XAXIDMA_IRQ_ALL_MASK)) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 1dc79cecc5cc..835577392178 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -554,8 +554,8 @@ int ionic_identify(struct ionic *ionic) memset(ident, 0, sizeof(*ident)); ident->drv.os_type = cpu_to_le32(IONIC_OS_TYPE_LINUX); - strncpy(ident->drv.driver_ver_str, UTS_RELEASE, - sizeof(ident->drv.driver_ver_str) - 1); + strscpy(ident->drv.driver_ver_str, UTS_RELEASE, + sizeof(ident->drv.driver_ver_str)); mutex_lock(&ionic->dev_cmd_lock); diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index cdcead614e9f..f67be4b8ad43 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -3204,8 +3204,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, BIT(big_ram->is_256b_bit_offset[dev_data->chip_id]) ? 256 : 128; - strncpy(type_name, big_ram->instance_name, BIG_RAM_NAME_LEN); - strncpy(mem_name, big_ram->instance_name, BIG_RAM_NAME_LEN); + memcpy(type_name, big_ram->instance_name, BIG_RAM_NAME_LEN); + memcpy(mem_name, big_ram->instance_name, BIG_RAM_NAME_LEN); /* Dump memory header */ offset += qed_grc_dump_mem_hdr(p_hwfn, @@ -6359,8 +6359,7 @@ static void qed_read_str_from_buf(void *buf, u32 *offset, u32 size, char *dest) { const char *source_str = &((const char *)buf)[*offset]; - strncpy(dest, source_str, size); - dest[size - 1] = '\0'; + strscpy(dest, source_str, size); *offset += size; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 6351a2dc13bc..df0df4d09f05 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4596,7 +4596,11 @@ static void r8169_phylink_handler(struct net_device *ndev) if (netif_carrier_ok(ndev)) { rtl_link_chg_patch(tp); pm_request_resume(d); + netif_wake_queue(tp->dev); } else { + /* In few cases rx is broken after link-down otherwise */ + if (rtl_is_8125(tp)) + rtl_reset_work(tp); pm_runtime_idle(d); } diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index e62391180032..c70cff80cc99 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2167,6 +2167,8 @@ static int ravb_close(struct net_device *ndev) of_phy_deregister_fixed_link(np); } + cancel_work_sync(&priv->work); + if (info->multi_irqs) { free_irq(priv->tx_irqs[RAVB_NC], ndev); free_irq(priv->rx_irqs[RAVB_NC], ndev); @@ -2891,8 +2893,6 @@ static void ravb_remove(struct platform_device *pdev) clk_disable_unprepare(priv->gptp_clk); clk_disable_unprepare(priv->refclk); - dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, - priv->desc_bat_dma); /* Set reset mode */ ravb_write(ndev, CCC_OPC_RESET, CCC); unregister_netdev(ndev); @@ -2900,6 +2900,8 @@ static void ravb_remove(struct platform_device *pdev) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); ravb_mdio_release(priv); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, + priv->desc_bat_dma); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(priv->rstc); diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 0023ff254915..112e605f104a 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1254,7 +1254,7 @@ static void rswitch_adjust_link(struct net_device *ndev) phy_print_status(phydev); if (phydev->link) phy_power_on(rdev->serdes); - else + else if (rdev->serdes->power_count) phy_power_off(rdev->serdes); rdev->etha->link = phydev->link; @@ -1964,15 +1964,17 @@ static void rswitch_deinit(struct rswitch_private *priv) rswitch_gwca_hw_deinit(priv); rcar_gen4_ptp_unregister(priv->ptp_priv); - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + rswitch_for_each_enabled_port(priv, i) { struct rswitch_device *rdev = priv->rdev[i]; - phy_exit(priv->rdev[i]->serdes); - rswitch_ether_port_deinit_one(rdev); unregister_netdev(rdev->ndev); - rswitch_device_free(priv, i); + rswitch_ether_port_deinit_one(rdev); + phy_exit(priv->rdev[i]->serdes); } + for (i = 0; i < RSWITCH_NUM_PORTS; i++) + rswitch_device_free(priv, i); + rswitch_gwca_ts_queue_free(priv); rswitch_gwca_linkfix_free(priv); diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 021980a958b7..10709d828a63 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1291,10 +1291,11 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) size_t outlen; int rc; - MCDI_POPULATE_DWORD_4(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, + MCDI_POPULATE_DWORD_5(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push, MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop, MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap, + MAE_ACTION_SET_ALLOC_IN_DO_NAT, act->do_nat, MAE_ACTION_SET_ALLOC_IN_DO_DECR_IP_TTL, act->do_ttl_dec); diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d23da9627338..76578502226e 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -2205,10 +2205,9 @@ int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type, goto out_free; } - strncpy(desc, + strscpy(desc, MCDI_PTR(outbuf, NVRAM_METADATA_OUT_DESCRIPTION), MC_CMD_NVRAM_METADATA_OUT_DESCRIPTION_NUM(outlen)); - desc[MC_CMD_NVRAM_METADATA_OUT_DESCRIPTION_NUM(outlen)] = '\0'; } else { desc[0] = '\0'; } diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 3d76b7598631..6db3d7ed3a86 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -2457,6 +2457,14 @@ static int efx_tc_flower_replace(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); rc = -EOPNOTSUPP; goto release; + case FLOW_ACTION_CT: + if (fa->ct.action != TCA_CT_ACT_NAT) { + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_FMT_MOD(extack, "Can only offload CT 'nat' action in RHS rules, not %d", fa->ct.action); + goto release; + } + act->do_nat = 1; + break; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", fa->id); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 86e38ea7988c..7b5190078bee 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -48,6 +48,7 @@ struct efx_tc_encap_action; /* see tc_encap_actions.h */ * @vlan_push: the number of vlan headers to push * @vlan_pop: the number of vlan headers to pop * @decap: used to indicate a tunnel header decapsulation should take place + * @do_nat: perform NAT/NPT with values returned by conntrack match * @do_ttl_dec: used to indicate IP TTL / Hop Limit should be decremented * @deliver: used to indicate a deliver action should take place * @vlan_tci: tci fields for vlan push actions @@ -68,6 +69,7 @@ struct efx_tc_action_set { u16 vlan_push:2; u16 vlan_pop:2; u16 decap:1; + u16 do_nat:1; u16 do_ttl_dec:1; u16 deliver:1; __be16 vlan_tci[2]; diff --git a/drivers/net/ethernet/sfc/tc_conntrack.c b/drivers/net/ethernet/sfc/tc_conntrack.c index 44bb57670340..d90206f27161 100644 --- a/drivers/net/ethernet/sfc/tc_conntrack.c +++ b/drivers/net/ethernet/sfc/tc_conntrack.c @@ -276,10 +276,84 @@ static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr, return 0; } +/** + * struct efx_tc_ct_mangler_state - tracks which fields have been pedited + * + * @ipv4: IP source or destination addr has been set + * @tcpudp: TCP/UDP source or destination port has been set + */ +struct efx_tc_ct_mangler_state { + u8 ipv4:1; + u8 tcpudp:1; +}; + +static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn, + const struct flow_action_entry *fa, + struct efx_tc_ct_mangler_state *mung) +{ + /* Is this the first mangle we've processed for this rule? */ + bool first = !(mung->ipv4 || mung->tcpudp); + bool dnat = false; + + switch (fa->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + switch (fa->mangle.offset) { + case offsetof(struct iphdr, daddr): + dnat = true; + fallthrough; + case offsetof(struct iphdr, saddr): + if (fa->mangle.mask) + return -EOPNOTSUPP; + conn->nat_ip = htonl(fa->mangle.val); + mung->ipv4 = 1; + break; + default: + return -EOPNOTSUPP; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ + switch (fa->mangle.offset) { + case offsetof(struct tcphdr, dest): + BUILD_BUG_ON(offsetof(struct tcphdr, dest) != + offsetof(struct udphdr, dest)); + dnat = true; + fallthrough; + case offsetof(struct tcphdr, source): + BUILD_BUG_ON(offsetof(struct tcphdr, source) != + offsetof(struct udphdr, source)); + if (~fa->mangle.mask != 0xffff) + return -EOPNOTSUPP; + conn->l4_natport = htons(fa->mangle.val); + mung->tcpudp = 1; + break; + default: + return -EOPNOTSUPP; + } + break; + default: + return -EOPNOTSUPP; + } + /* first mangle tells us whether this is SNAT or DNAT; + * subsequent mangles must match that + */ + if (first) + conn->dnat = dnat; + else if (conn->dnat != dnat) + return -EOPNOTSUPP; + return 0; +} + static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, struct flow_cls_offload *tc) { struct flow_rule *fr = flow_cls_offload_flow_rule(tc); + struct efx_tc_ct_mangler_state mung = {}; struct efx_tc_ct_entry *conn, *old; struct efx_nic *efx = ct_zone->efx; const struct flow_action_entry *fa; @@ -326,6 +400,17 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, goto release; } break; + case FLOW_ACTION_MANGLE: + if (conn->eth_proto != htons(ETH_P_IP)) { + netif_dbg(efx, drv, efx->net_dev, + "NAT only supported for IPv4\n"); + rc = -EOPNOTSUPP; + goto release; + } + rc = efx_tc_ct_mangle(efx, conn, fa, &mung); + if (rc) + goto release; + break; default: netif_dbg(efx, drv, efx->net_dev, "Unhandled action %u for conntrack\n", fa->id); @@ -335,8 +420,10 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, } /* fill in defaults for unmangled values */ - conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip; - conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport; + if (!mung.ipv4) + conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip; + if (!mung.tcpudp) + conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport; cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT); if (IS_ERR(cnt)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 70edc5232379..d68f0c4e7835 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -7,8 +7,8 @@ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/of_device.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/stmmac.h> #include "dwmac4.h" @@ -76,7 +76,6 @@ static int intel_eth_plat_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - const struct of_device_id *match; struct intel_dwmac *dwmac; unsigned long rate; int ret; @@ -98,10 +97,8 @@ static int intel_eth_plat_probe(struct platform_device *pdev) dwmac->dev = &pdev->dev; dwmac->tx_clk = NULL; - match = of_match_device(intel_eth_plat_match, &pdev->dev); - if (match && match->data) { - dwmac->data = (const struct intel_dwmac_data *)match->data; - + dwmac->data = device_get_match_data(&pdev->dev); + if (dwmac->data) { if (dwmac->data->fix_mac_speed) plat_dat->fix_mac_speed = dwmac->data->fix_mac_speed; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index d8d3c729f219..c92dfc4ecf57 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -98,7 +98,6 @@ struct stm32_dwmac { struct stm32_ops { int (*set_mode)(struct plat_stmmacenet_data *plat_dat); - int (*clk_prepare)(struct stm32_dwmac *dwmac, bool prepare); int (*suspend)(struct stm32_dwmac *dwmac); void (*resume)(struct stm32_dwmac *dwmac); int (*parse_data)(struct stm32_dwmac *dwmac, @@ -107,62 +106,55 @@ struct stm32_ops { bool clk_rx_enable_in_suspend; }; -static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) +static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume) { - struct stm32_dwmac *dwmac = plat_dat->bsp_priv; int ret; - if (dwmac->ops->set_mode) { - ret = dwmac->ops->set_mode(plat_dat); - if (ret) - return ret; - } - ret = clk_prepare_enable(dwmac->clk_tx); if (ret) - return ret; + goto err_clk_tx; - if (!dwmac->ops->clk_rx_enable_in_suspend || - !dwmac->dev->power.is_suspended) { + if (!dwmac->ops->clk_rx_enable_in_suspend || !resume) { ret = clk_prepare_enable(dwmac->clk_rx); - if (ret) { - clk_disable_unprepare(dwmac->clk_tx); - return ret; - } + if (ret) + goto err_clk_rx; } - if (dwmac->ops->clk_prepare) { - ret = dwmac->ops->clk_prepare(dwmac, true); - if (ret) { - clk_disable_unprepare(dwmac->clk_rx); - clk_disable_unprepare(dwmac->clk_tx); - } + ret = clk_prepare_enable(dwmac->syscfg_clk); + if (ret) + goto err_syscfg_clk; + + if (dwmac->enable_eth_ck) { + ret = clk_prepare_enable(dwmac->clk_eth_ck); + if (ret) + goto err_clk_eth_ck; } return ret; + +err_clk_eth_ck: + clk_disable_unprepare(dwmac->syscfg_clk); +err_syscfg_clk: + if (!dwmac->ops->clk_rx_enable_in_suspend || !resume) + clk_disable_unprepare(dwmac->clk_rx); +err_clk_rx: + clk_disable_unprepare(dwmac->clk_tx); +err_clk_tx: + return ret; } -static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare) +static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat, bool resume) { - int ret = 0; + struct stm32_dwmac *dwmac = plat_dat->bsp_priv; + int ret; - if (prepare) { - ret = clk_prepare_enable(dwmac->syscfg_clk); + if (dwmac->ops->set_mode) { + ret = dwmac->ops->set_mode(plat_dat); if (ret) return ret; - if (dwmac->enable_eth_ck) { - ret = clk_prepare_enable(dwmac->clk_eth_ck); - if (ret) { - clk_disable_unprepare(dwmac->syscfg_clk); - return ret; - } - } - } else { - clk_disable_unprepare(dwmac->syscfg_clk); - if (dwmac->enable_eth_ck) - clk_disable_unprepare(dwmac->clk_eth_ck); } - return ret; + + return stm32_dwmac_clk_enable(dwmac, resume); } static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat) @@ -252,13 +244,15 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat) dwmac->ops->syscfg_eth_mask, val << 23); } -static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac) +static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac, bool suspend) { clk_disable_unprepare(dwmac->clk_tx); - clk_disable_unprepare(dwmac->clk_rx); + if (!dwmac->ops->clk_rx_enable_in_suspend || !suspend) + clk_disable_unprepare(dwmac->clk_rx); - if (dwmac->ops->clk_prepare) - dwmac->ops->clk_prepare(dwmac, false); + clk_disable_unprepare(dwmac->syscfg_clk); + if (dwmac->enable_eth_ck) + clk_disable_unprepare(dwmac->clk_eth_ck); } static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac, @@ -397,7 +391,7 @@ static int stm32_dwmac_probe(struct platform_device *pdev) plat_dat->bsp_priv = dwmac; - ret = stm32_dwmac_init(plat_dat); + ret = stm32_dwmac_init(plat_dat, false); if (ret) return ret; @@ -408,7 +402,7 @@ static int stm32_dwmac_probe(struct platform_device *pdev) return 0; err_clk_disable: - stm32_dwmac_clk_disable(dwmac); + stm32_dwmac_clk_disable(dwmac, false); return ret; } @@ -421,7 +415,7 @@ static void stm32_dwmac_remove(struct platform_device *pdev) stmmac_dvr_remove(&pdev->dev); - stm32_dwmac_clk_disable(priv->plat->bsp_priv); + stm32_dwmac_clk_disable(dwmac, false); if (dwmac->irq_pwr_wakeup >= 0) { dev_pm_clear_wake_irq(&pdev->dev); @@ -431,18 +425,7 @@ static void stm32_dwmac_remove(struct platform_device *pdev) static int stm32mp1_suspend(struct stm32_dwmac *dwmac) { - int ret = 0; - - ret = clk_prepare_enable(dwmac->clk_ethstp); - if (ret) - return ret; - - clk_disable_unprepare(dwmac->clk_tx); - clk_disable_unprepare(dwmac->syscfg_clk); - if (dwmac->enable_eth_ck) - clk_disable_unprepare(dwmac->clk_eth_ck); - - return ret; + return clk_prepare_enable(dwmac->clk_ethstp); } static void stm32mp1_resume(struct stm32_dwmac *dwmac) @@ -450,14 +433,6 @@ static void stm32mp1_resume(struct stm32_dwmac *dwmac) clk_disable_unprepare(dwmac->clk_ethstp); } -static int stm32mcu_suspend(struct stm32_dwmac *dwmac) -{ - clk_disable_unprepare(dwmac->clk_tx); - clk_disable_unprepare(dwmac->clk_rx); - - return 0; -} - #ifdef CONFIG_PM_SLEEP static int stm32_dwmac_suspend(struct device *dev) { @@ -468,6 +443,10 @@ static int stm32_dwmac_suspend(struct device *dev) int ret; ret = stmmac_suspend(dev); + if (ret) + return ret; + + stm32_dwmac_clk_disable(dwmac, true); if (dwmac->ops->suspend) ret = dwmac->ops->suspend(dwmac); @@ -485,7 +464,7 @@ static int stm32_dwmac_resume(struct device *dev) if (dwmac->ops->resume) dwmac->ops->resume(dwmac); - ret = stm32_dwmac_init(priv->plat); + ret = stm32_dwmac_init(priv->plat, true); if (ret) return ret; @@ -500,13 +479,11 @@ static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, static struct stm32_ops stm32mcu_dwmac_data = { .set_mode = stm32mcu_set_mode, - .suspend = stm32mcu_suspend, .syscfg_eth_mask = SYSCFG_MCU_ETH_MASK }; static struct stm32_ops stm32mp1_dwmac_data = { .set_mode = stm32mp1_set_mode, - .clk_prepare = stm32mp1_clk_prepare, .suspend = stm32mp1_suspend, .resume = stm32mp1_resume, .parse_data = stm32mp1_parse_data, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 3d7825cb30bb..1be06b96c35f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -81,7 +81,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac); write_unlock_irqrestore(&priv->ptp_lock, flags); - /* Caculate new basetime and re-configured EST after PTP time adjust. */ + /* Calculate new basetime and re-configured EST after PTP time adjust. */ if (est_rst) { struct timespec64 current_time, time; ktime_t current_time_ns, basetime; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index f9e43fc32ee8..3ca1c2a816ff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -802,7 +802,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv) stmmac_start_rx(priv, priv->ioaddr, i); local_bh_disable(); - napi_reschedule(&ch->rx_napi); + napi_schedule(&ch->rx_napi); local_bh_enable(); } diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 5d756df133eb..23f8bc1cd20d 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -38,6 +38,7 @@ #include <linux/dma-mapping.h> #include <linux/clk.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/semaphore.h> #include <linux/phy.h> @@ -47,10 +48,7 @@ #include <linux/pm_runtime.h> #include <linux/davinci_emac.h> #include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_device.h> #include <linux/of_mdio.h> -#include <linux/of_irq.h> #include <linux/of_net.h> #include <linux/mfd/syscon.h> @@ -1726,13 +1724,10 @@ static const struct net_device_ops emac_netdev_ops = { #endif }; -static const struct of_device_id davinci_emac_of_match[]; - static struct emac_platform_data * davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) { struct device_node *np; - const struct of_device_id *match; const struct emac_platform_data *auxdata; struct emac_platform_data *pdata = NULL; @@ -1779,9 +1774,8 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) pdata->interrupt_disable = auxdata->interrupt_disable; } - match = of_match_device(davinci_emac_of_match, &pdev->dev); - if (match && match->data) { - auxdata = match->data; + auxdata = device_get_match_data(&pdev->dev); + if (auxdata) { pdata->version = auxdata->version; pdata->hw_ram_addr = auxdata->hw_ram_addr; } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 482ae82a99c1..de3bb9da3b13 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -19,11 +19,11 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of.h> -#include <linux/of_irq.h> #include <linux/of_mdio.h> #include <linux/of_net.h> -#include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/phy.h> +#include <linux/property.h> #include <linux/remoteproc/pruss.h> #include <linux/regmap.h> #include <linux/remoteproc.h> @@ -1934,8 +1934,6 @@ static void prueth_put_cores(struct prueth *prueth, int slice) pru_rproc_put(prueth->pru[slice]); } -static const struct of_device_id prueth_dt_match[]; - static int prueth_probe(struct platform_device *pdev) { struct device_node *eth_node, *eth_ports_node; @@ -1944,7 +1942,6 @@ static int prueth_probe(struct platform_device *pdev) struct genpool_data_align gp_data = { .align = SZ_64K, }; - const struct of_device_id *match; struct device *dev = &pdev->dev; struct device_node *np; struct prueth *prueth; @@ -1954,17 +1951,13 @@ static int prueth_probe(struct platform_device *pdev) np = dev->of_node; - match = of_match_device(prueth_dt_match, dev); - if (!match) - return -ENODEV; - prueth = devm_kzalloc(dev, sizeof(*prueth), GFP_KERNEL); if (!prueth) return -ENOMEM; dev_set_drvdata(dev, prueth); prueth->pdev = pdev; - prueth->pdata = *(const struct prueth_pdata *)match->data; + prueth->pdata = *(const struct prueth_pdata *)device_get_match_data(dev); prueth->dev = dev; eth_ports_node = of_get_child_by_name(np, "ethernet-ports"); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 2adf82a32bf6..02cb6474f6dc 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1735,8 +1735,8 @@ static const struct netcp_ethtool_stat xgbe10_et_stats[] = { static void keystone_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info) { - strncpy(info->driver, NETCP_DRIVER_NAME, sizeof(info->driver)); - strncpy(info->version, NETCP_DRIVER_VERSION, sizeof(info->version)); + strscpy(info->driver, NETCP_DRIVER_NAME, sizeof(info->driver)); + strscpy(info->version, NETCP_DRIVER_VERSION, sizeof(info->version)); } static u32 keystone_get_msglevel(struct net_device *ndev) diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 14cf6ecf6d0d..6e3758dfbdbd 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1434,14 +1434,10 @@ static irqreturn_t tc35815_interrupt(int irq, void *dev_id) u32 dmactl = tc_readl(&tr->DMA_Ctl); if (!(dmactl & DMA_IntMask)) { - /* disable interrupts */ - tc_writel(dmactl | DMA_IntMask, &tr->DMA_Ctl); - if (napi_schedule_prep(&lp->napi)) + if (napi_schedule_prep(&lp->napi)) { + /* disable interrupts */ + tc_writel(dmactl | DMA_IntMask, &tr->DMA_Ctl); __napi_schedule(&lp->napi); - else { - printk(KERN_ERR "%s: interrupt taken in poll\n", - dev->name); - BUG(); } (void)tc_readl(&tr->Int_Src); /* flush */ return IRQ_HANDLED; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 93cb6f2294e7..ddc5f6d20b9c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -3,9 +3,171 @@ #include <linux/pci.h> #include <linux/phy.h> +#include <linux/ethtool.h> #include "wx_type.h" #include "wx_ethtool.h" +#include "wx_hw.h" + +struct wx_stats { + char stat_string[ETH_GSTRING_LEN]; + size_t sizeof_stat; + off_t stat_offset; +}; + +#define WX_STAT(str, m) { \ + .stat_string = str, \ + .sizeof_stat = sizeof(((struct wx *)0)->m), \ + .stat_offset = offsetof(struct wx, m) } + +static const struct wx_stats wx_gstrings_stats[] = { + WX_STAT("rx_dma_pkts", stats.gprc), + WX_STAT("tx_dma_pkts", stats.gptc), + WX_STAT("rx_dma_bytes", stats.gorc), + WX_STAT("tx_dma_bytes", stats.gotc), + WX_STAT("rx_total_pkts", stats.tpr), + WX_STAT("tx_total_pkts", stats.tpt), + WX_STAT("rx_long_length_count", stats.roc), + WX_STAT("rx_short_length_count", stats.ruc), + WX_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), + WX_STAT("os2bmc_tx_by_bmc", stats.b2ospc), + WX_STAT("os2bmc_tx_by_host", stats.o2bspc), + WX_STAT("os2bmc_rx_by_host", stats.b2ogprc), + WX_STAT("rx_no_dma_resources", stats.rdmdrop), + WX_STAT("tx_busy", tx_busy), + WX_STAT("non_eop_descs", non_eop_descs), + WX_STAT("tx_restart_queue", restart_queue), + WX_STAT("rx_csum_offload_good_count", hw_csum_rx_good), + WX_STAT("rx_csum_offload_errors", hw_csum_rx_error), + WX_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed), +}; + +/* drivers allocates num_tx_queues and num_rx_queues symmetrically so + * we set the num_rx_queues to evaluate to num_tx_queues. This is + * used because we do not have a good way to get the max number of + * rx queues with CONFIG_RPS disabled. + */ +#define WX_NUM_RX_QUEUES netdev->num_tx_queues +#define WX_NUM_TX_QUEUES netdev->num_tx_queues + +#define WX_QUEUE_STATS_LEN ( \ + (WX_NUM_TX_QUEUES + WX_NUM_RX_QUEUES) * \ + (sizeof(struct wx_queue_stats) / sizeof(u64))) +#define WX_GLOBAL_STATS_LEN ARRAY_SIZE(wx_gstrings_stats) +#define WX_STATS_LEN (WX_GLOBAL_STATS_LEN + WX_QUEUE_STATS_LEN) + +int wx_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return WX_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL(wx_get_sset_count); + +void wx_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < WX_GLOBAL_STATS_LEN; i++) + ethtool_sprintf(&p, wx_gstrings_stats[i].stat_string); + for (i = 0; i < netdev->num_tx_queues; i++) { + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); + } + for (i = 0; i < WX_NUM_RX_QUEUES; i++) { + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); + } + break; + } +} +EXPORT_SYMBOL(wx_get_strings); + +void wx_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct wx *wx = netdev_priv(netdev); + struct wx_ring *ring; + unsigned int start; + int i, j; + char *p; + + wx_update_stats(wx); + + for (i = 0; i < WX_GLOBAL_STATS_LEN; i++) { + p = (char *)wx + wx_gstrings_stats[i].stat_offset; + data[i] = (wx_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + + for (j = 0; j < netdev->num_tx_queues; j++) { + ring = wx->tx_ring[j]; + if (!ring) { + data[i++] = 0; + data[i++] = 0; + continue; + } + + do { + start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; + } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } + for (j = 0; j < WX_NUM_RX_QUEUES; j++) { + ring = wx->rx_ring[j]; + if (!ring) { + data[i++] = 0; + data[i++] = 0; + continue; + } + + do { + start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; + } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } +} +EXPORT_SYMBOL(wx_get_ethtool_stats); + +void wx_get_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct wx *wx = netdev_priv(netdev); + struct wx_hw_stats *hwstats; + + wx_update_stats(wx); + + hwstats = &wx->stats; + mac_stats->MulticastFramesXmittedOK = hwstats->mptc; + mac_stats->BroadcastFramesXmittedOK = hwstats->bptc; + mac_stats->MulticastFramesReceivedOK = hwstats->mprc; + mac_stats->BroadcastFramesReceivedOK = hwstats->bprc; +} +EXPORT_SYMBOL(wx_get_mac_stats); + +void wx_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *stats) +{ + struct wx *wx = netdev_priv(netdev); + struct wx_hw_stats *hwstats; + + wx_update_stats(wx); + + hwstats = &wx->stats; + stats->tx_pause_frames = hwstats->lxontxc + hwstats->lxofftxc; + stats->rx_pause_frames = hwstats->lxonoffrxc; +} +EXPORT_SYMBOL(wx_get_pause_stats); void wx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -14,5 +176,12 @@ void wx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) strscpy(info->driver, wx->driver_name, sizeof(info->driver)); strscpy(info->fw_version, wx->eeprom_id, sizeof(info->fw_version)); strscpy(info->bus_info, pci_name(wx->pdev), sizeof(info->bus_info)); + if (wx->num_tx_queues <= WX_NUM_TX_QUEUES) { + info->n_stats = WX_STATS_LEN - + (WX_NUM_TX_QUEUES - wx->num_tx_queues) * + (sizeof(struct wx_queue_stats) / sizeof(u64)) * 2; + } else { + info->n_stats = WX_STATS_LEN; + } } EXPORT_SYMBOL(wx_get_drvinfo); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h index e85538c69454..16d1a09369a6 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h @@ -4,5 +4,13 @@ #ifndef _WX_ETHTOOL_H_ #define _WX_ETHTOOL_H_ +int wx_get_sset_count(struct net_device *netdev, int sset); +void wx_get_strings(struct net_device *netdev, u32 stringset, u8 *data); +void wx_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data); +void wx_get_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats); +void wx_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *stats); void wx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info); #endif /* _WX_ETHTOOL_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index f0063d569c80..a3c5de9d547a 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2002,6 +2002,105 @@ int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) EXPORT_SYMBOL(wx_vlan_rx_kill_vid); /** + * wx_update_stats - Update the board statistics counters. + * @wx: board private structure + **/ +void wx_update_stats(struct wx *wx) +{ + struct wx_hw_stats *hwstats = &wx->stats; + + u64 non_eop_descs = 0, alloc_rx_buff_failed = 0; + u64 hw_csum_rx_good = 0, hw_csum_rx_error = 0; + u64 restart_queue = 0, tx_busy = 0; + u32 i; + + /* gather some stats to the wx struct that are per queue */ + for (i = 0; i < wx->num_rx_queues; i++) { + struct wx_ring *rx_ring = wx->rx_ring[i]; + + non_eop_descs += rx_ring->rx_stats.non_eop_descs; + alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; + hw_csum_rx_good += rx_ring->rx_stats.csum_good_cnt; + hw_csum_rx_error += rx_ring->rx_stats.csum_err; + } + wx->non_eop_descs = non_eop_descs; + wx->alloc_rx_buff_failed = alloc_rx_buff_failed; + wx->hw_csum_rx_error = hw_csum_rx_error; + wx->hw_csum_rx_good = hw_csum_rx_good; + + for (i = 0; i < wx->num_tx_queues; i++) { + struct wx_ring *tx_ring = wx->tx_ring[i]; + + restart_queue += tx_ring->tx_stats.restart_queue; + tx_busy += tx_ring->tx_stats.tx_busy; + } + wx->restart_queue = restart_queue; + wx->tx_busy = tx_busy; + + hwstats->gprc += rd32(wx, WX_RDM_PKT_CNT); + hwstats->gptc += rd32(wx, WX_TDM_PKT_CNT); + hwstats->gorc += rd64(wx, WX_RDM_BYTE_CNT_LSB); + hwstats->gotc += rd64(wx, WX_TDM_BYTE_CNT_LSB); + hwstats->tpr += rd64(wx, WX_RX_FRAME_CNT_GOOD_BAD_L); + hwstats->tpt += rd64(wx, WX_TX_FRAME_CNT_GOOD_BAD_L); + hwstats->crcerrs += rd64(wx, WX_RX_CRC_ERROR_FRAMES_L); + hwstats->rlec += rd64(wx, WX_RX_LEN_ERROR_FRAMES_L); + hwstats->bprc += rd64(wx, WX_RX_BC_FRAMES_GOOD_L); + hwstats->bptc += rd64(wx, WX_TX_BC_FRAMES_GOOD_L); + hwstats->mprc += rd64(wx, WX_RX_MC_FRAMES_GOOD_L); + hwstats->mptc += rd64(wx, WX_TX_MC_FRAMES_GOOD_L); + hwstats->roc += rd32(wx, WX_RX_OVERSIZE_FRAMES_GOOD); + hwstats->ruc += rd32(wx, WX_RX_UNDERSIZE_FRAMES_GOOD); + hwstats->lxonoffrxc += rd32(wx, WX_MAC_LXONOFFRXC); + hwstats->lxontxc += rd32(wx, WX_RDB_LXONTXC); + hwstats->lxofftxc += rd32(wx, WX_RDB_LXOFFTXC); + hwstats->o2bgptc += rd32(wx, WX_TDM_OS2BMC_CNT); + hwstats->b2ospc += rd32(wx, WX_MNG_BMC2OS_CNT); + hwstats->o2bspc += rd32(wx, WX_MNG_OS2BMC_CNT); + hwstats->b2ogprc += rd32(wx, WX_RDM_BMC2OS_CNT); + hwstats->rdmdrop += rd32(wx, WX_RDM_DRP_PKT); + + for (i = 0; i < wx->mac.max_rx_queues; i++) + hwstats->qmprc += rd32(wx, WX_PX_MPRC(i)); +} +EXPORT_SYMBOL(wx_update_stats); + +/** + * wx_clear_hw_cntrs - Generic clear hardware counters + * @wx: board private structure + * + * Clears all hardware statistics counters by reading them from the hardware + * Statistics counters are clear on read. + **/ +void wx_clear_hw_cntrs(struct wx *wx) +{ + u16 i = 0; + + for (i = 0; i < wx->mac.max_rx_queues; i++) + wr32(wx, WX_PX_MPRC(i), 0); + + rd32(wx, WX_RDM_PKT_CNT); + rd32(wx, WX_TDM_PKT_CNT); + rd64(wx, WX_RDM_BYTE_CNT_LSB); + rd32(wx, WX_TDM_BYTE_CNT_LSB); + rd32(wx, WX_RDM_DRP_PKT); + rd32(wx, WX_RX_UNDERSIZE_FRAMES_GOOD); + rd32(wx, WX_RX_OVERSIZE_FRAMES_GOOD); + rd64(wx, WX_RX_FRAME_CNT_GOOD_BAD_L); + rd64(wx, WX_TX_FRAME_CNT_GOOD_BAD_L); + rd64(wx, WX_RX_MC_FRAMES_GOOD_L); + rd64(wx, WX_TX_MC_FRAMES_GOOD_L); + rd64(wx, WX_RX_BC_FRAMES_GOOD_L); + rd64(wx, WX_TX_BC_FRAMES_GOOD_L); + rd64(wx, WX_RX_CRC_ERROR_FRAMES_L); + rd64(wx, WX_RX_LEN_ERROR_FRAMES_L); + rd32(wx, WX_RDB_LXONTXC); + rd32(wx, WX_RDB_LXOFFTXC); + rd32(wx, WX_MAC_LXONOFFRXC); +} +EXPORT_SYMBOL(wx_clear_hw_cntrs); + +/** * wx_start_hw - Prepare hardware for Tx/Rx * @wx: pointer to hardware structure * diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 48d3ccabc272..12c20a7c364d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -41,5 +41,7 @@ int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count); int wx_sw_init(struct wx *wx); int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); +void wx_update_stats(struct wx *wx); +void wx_clear_hw_cntrs(struct wx *wx); #endif /* _WX_HW_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index e04d4a5eed7b..2823861e5a92 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -488,6 +488,7 @@ static bool wx_is_non_eop(struct wx_ring *rx_ring, return false; rx_ring->rx_buffer_info[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; return true; } @@ -721,6 +722,7 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!skb) { + rx_ring->rx_stats.alloc_rx_buff_failed++; rx_buffer->pagecnt_bias++; break; } @@ -877,9 +879,11 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector, if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - netif_running(tx_ring->netdev)) + netif_running(tx_ring->netdev)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); + ++tx_ring->tx_stats.restart_queue; + } } return !!budget; @@ -956,6 +960,7 @@ static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); + ++tx_ring->tx_stats.restart_queue; return 0; } @@ -1533,8 +1538,10 @@ static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb, count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)-> frags[f])); - if (wx_maybe_stop_tx(tx_ring, count + 3)) + if (wx_maybe_stop_tx(tx_ring, count + 3)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; @@ -2665,8 +2672,11 @@ void wx_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct wx *wx = netdev_priv(netdev); + struct wx_hw_stats *hwstats; int i; + wx_update_stats(wx); + rcu_read_lock(); for (i = 0; i < wx->num_rx_queues; i++) { struct wx_ring *ring = READ_ONCE(wx->rx_ring[i]); @@ -2702,6 +2712,12 @@ void wx_get_stats64(struct net_device *netdev, } rcu_read_unlock(); + + hwstats = &wx->stats; + stats->rx_errors = hwstats->crcerrs + hwstats->rlec; + stats->multicast = hwstats->qmprc; + stats->rx_length_errors = hwstats->rlec; + stats->rx_crc_errors = hwstats->crcerrs; } EXPORT_SYMBOL(wx_get_stats64); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index e3fc49284219..165e82de772e 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -59,6 +59,25 @@ #define WX_TS_ALARM_ST_DALARM BIT(1) #define WX_TS_ALARM_ST_ALARM BIT(0) +/* statistic */ +#define WX_TX_FRAME_CNT_GOOD_BAD_L 0x1181C +#define WX_TX_BC_FRAMES_GOOD_L 0x11824 +#define WX_TX_MC_FRAMES_GOOD_L 0x1182C +#define WX_RX_FRAME_CNT_GOOD_BAD_L 0x11900 +#define WX_RX_BC_FRAMES_GOOD_L 0x11918 +#define WX_RX_MC_FRAMES_GOOD_L 0x11920 +#define WX_RX_CRC_ERROR_FRAMES_L 0x11928 +#define WX_RX_LEN_ERROR_FRAMES_L 0x11978 +#define WX_RX_UNDERSIZE_FRAMES_GOOD 0x11938 +#define WX_RX_OVERSIZE_FRAMES_GOOD 0x1193C +#define WX_MAC_LXONOFFRXC 0x11E0C + +/*********************** Receive DMA registers **************************/ +#define WX_RDM_DRP_PKT 0x12500 +#define WX_RDM_PKT_CNT 0x12504 +#define WX_RDM_BYTE_CNT_LSB 0x12508 +#define WX_RDM_BMC2OS_CNT 0x12510 + /************************* Port Registers ************************************/ /* port cfg Registers */ #define WX_CFG_PORT_CTL 0x14400 @@ -94,6 +113,9 @@ #define WX_TDM_CTL_TE BIT(0) /* Transmit Enable */ #define WX_TDM_PB_THRE(_i) (0x18020 + ((_i) * 4)) #define WX_TDM_RP_IDX 0x1820C +#define WX_TDM_PKT_CNT 0x18308 +#define WX_TDM_BYTE_CNT_LSB 0x1830C +#define WX_TDM_OS2BMC_CNT 0x18314 #define WX_TDM_RP_RATE 0x18404 /***************************** RDB registers *********************************/ @@ -106,6 +128,8 @@ /* statistic */ #define WX_RDB_PFCMACDAL 0x19210 #define WX_RDB_PFCMACDAH 0x19214 +#define WX_RDB_LXOFFTXC 0x19218 +#define WX_RDB_LXONTXC 0x1921C /* ring assignment */ #define WX_RDB_PL_CFG(_i) (0x19300 + ((_i) * 4)) #define WX_RDB_PL_CFG_L4HDR BIT(1) @@ -218,6 +242,8 @@ #define WX_MNG_MBOX_CTL 0x1E044 #define WX_MNG_MBOX_CTL_SWRDY BIT(0) #define WX_MNG_MBOX_CTL_FWRDY BIT(2) +#define WX_MNG_BMC2OS_CNT 0x1E090 +#define WX_MNG_OS2BMC_CNT 0x1E094 /************************************* ETH MAC *****************************/ #define WX_MAC_TX_CFG 0x11000 @@ -301,6 +327,7 @@ enum WX_MSCA_CMD_value { #define WX_PX_RR_WP(_i) (0x01008 + ((_i) * 0x40)) #define WX_PX_RR_RP(_i) (0x0100C + ((_i) * 0x40)) #define WX_PX_RR_CFG(_i) (0x01010 + ((_i) * 0x40)) +#define WX_PX_MPRC(_i) (0x01020 + ((_i) * 0x40)) /* PX_RR_CFG bit definitions */ #define WX_PX_RR_CFG_VLAN BIT(31) #define WX_PX_RR_CFG_SPLIT_MODE BIT(26) @@ -768,9 +795,16 @@ struct wx_queue_stats { u64 bytes; }; +struct wx_tx_queue_stats { + u64 restart_queue; + u64 tx_busy; +}; + struct wx_rx_queue_stats { + u64 non_eop_descs; u64 csum_good_cnt; u64 csum_err; + u64 alloc_rx_buff_failed; }; /* iterator for handling rings in ring container */ @@ -814,6 +848,7 @@ struct wx_ring { struct wx_queue_stats stats; struct u64_stats_sync syncp; union { + struct wx_tx_queue_stats tx_stats; struct wx_rx_queue_stats rx_stats; }; } ____cacheline_internodealigned_in_smp; @@ -845,6 +880,33 @@ enum wx_isb_idx { WX_ISB_MAX }; +/* Statistics counters collected by the MAC */ +struct wx_hw_stats { + u64 gprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 tpr; + u64 tpt; + u64 bprc; + u64 bptc; + u64 mprc; + u64 mptc; + u64 roc; + u64 ruc; + u64 lxonoffrxc; + u64 lxontxc; + u64 lxofftxc; + u64 o2bgptc; + u64 b2ospc; + u64 o2bspc; + u64 b2ogprc; + u64 rdmdrop; + u64 crcerrs; + u64 rlec; + u64 qmprc; +}; + struct wx { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -920,6 +982,14 @@ struct wx { u32 wol; u16 bd_number; + + struct wx_hw_stats stats; + u64 tx_busy; + u64 non_eop_descs; + u64 restart_queue; + u64 hw_csum_rx_good; + u64 hw_csum_rx_error; + u64 alloc_rx_buff_failed; }; #define WX_INTR_ALL (~0ULL) @@ -953,6 +1023,17 @@ wr32m(struct wx *wx, u32 reg, u32 mask, u32 field) wr32(wx, reg, val); } +static inline u64 +rd64(struct wx *wx, u32 reg) +{ + u64 lsb, msb; + + lsb = rd32(wx, reg); + msb = rd32(wx, reg + 4); + + return (lsb | msb << 32); +} + /* On some domestic CPU platforms, sometimes IO is not synchronized with * flushing memory, here use readl() to flush PCI read and write. */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index ec0e869e9aac..afbdf6919071 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -49,6 +49,11 @@ static const struct ethtool_ops ngbe_ethtool_ops = { .nway_reset = phy_ethtool_nway_reset, .get_wol = ngbe_get_wol, .set_wol = ngbe_set_wol, + .get_sset_count = wx_get_sset_count, + .get_strings = wx_get_strings, + .get_ethtool_stats = wx_get_ethtool_stats, + .get_eth_mac_stats = wx_get_mac_stats, + .get_pause_stats = wx_get_pause_stats, }; void ngbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c index 6562a2de9527..6459bc1d7c22 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c @@ -85,6 +85,8 @@ int ngbe_reset_hw(struct wx *wx) } ngbe_reset_misc(wx); + wx_clear_hw_cntrs(wx); + /* Store the permanent mac address */ wx_get_mac_addr(wx, wx->mac.perm_addr); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 2b431db6085a..652e6576e36a 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -332,6 +332,8 @@ static void ngbe_disable_device(struct wx *wx) wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH); } + + wx_update_stats(wx); } static void ngbe_down(struct wx *wx) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index 859da112586a..3f336a088e43 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -39,6 +39,11 @@ static const struct ethtool_ops txgbe_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = txgbe_get_link_ksettings, .set_link_ksettings = txgbe_set_link_ksettings, + .get_sset_count = wx_get_sset_count, + .get_strings = wx_get_strings, + .get_ethtool_stats = wx_get_ethtool_stats, + .get_eth_mac_stats = wx_get_mac_stats, + .get_pause_stats = wx_get_pause_stats, }; void txgbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index 372745250270..474d55524e82 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -306,6 +306,8 @@ int txgbe_reset_hw(struct wx *wx) txgbe_reset_misc(wx); + wx_clear_hw_cntrs(wx); + /* Store the permanent mac address */ wx_get_mac_addr(wx, wx->mac.perm_addr); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 5c3aed516ac2..394f699c51da 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -286,6 +286,8 @@ static void txgbe_disable_device(struct wx *wx) /* Disable the Tx DMA engine */ wr32m(wx, WX_TDM_CTL, WX_TDM_CTL_TE, 0); + + wx_update_stats(wx); } static void txgbe_down(struct wx *wx) diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 7c52796273a4..990a3cce8c0f 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -14,8 +14,8 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/netdevice.h> +#include <linux/of.h> #include <linux/of_net.h> -#include <linux/of_device.h> #include <linux/spi/spi.h> #include "w5100.h" @@ -420,7 +420,6 @@ MODULE_DEVICE_TABLE(of, w5100_of_match); static int w5100_spi_probe(struct spi_device *spi) { - const struct of_device_id *of_id; const struct w5100_ops *ops; kernel_ulong_t driver_data; const void *mac = NULL; @@ -432,14 +431,7 @@ static int w5100_spi_probe(struct spi_device *spi) if (!ret) mac = tmpmac; - if (spi->dev.of_node) { - of_id = of_match_device(w5100_of_match, &spi->dev); - if (!of_id) - return -ENODEV; - driver_data = (kernel_ulong_t)of_id->data; - } else { - driver_data = spi_get_device_id(spi)->driver_data; - } + driver_data = (uintptr_t)spi_get_device_match_data(spi); switch (driver_data) { case W5100: diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index 341ee2f249fd..b26fd15c25ae 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -930,8 +930,8 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance) if (priv->ops->may_sleep) queue_work(priv->xfer_wq, &priv->rx_work); - else if (napi_schedule_prep(&priv->napi)) - __napi_schedule(&priv->napi); + else + napi_schedule(&priv->napi); } return IRQ_HANDLED; diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index c47108f2a0a0..531bf919aef5 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -723,9 +723,9 @@ static int eth_poll(struct napi_struct *napi, int budget) napi_complete(napi); qmgr_enable_irq(rxq); if (!qmgr_stat_below_low_watermark(rxq) && - napi_reschedule(napi)) { /* not empty again */ + napi_schedule(napi)) { /* not empty again */ #if DEBUG_RX - netdev_debug(dev, "eth_poll napi_reschedule succeeded\n"); + netdev_debug(dev, "eth_poll napi_schedule succeeded\n"); #endif qmgr_disable_irq(rxq); continue; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 2513be6d4e11..cd8cf08477ec 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1030,7 +1030,7 @@ static int fjes_poll(struct napi_struct *napi, int budget) } if (((long)jiffies - (long)adapter->rx_last_jiffies) < 3) { - napi_reschedule(napi); + napi_schedule(napi); } else { spin_lock(&hw->rx_status_lock); for (epidx = 0; epidx < hw->max_epid; epidx++) { diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 78f9d588f712..23041eeec121 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -784,60 +784,20 @@ free_dst: return err; } -static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, - struct net_device *dev, - struct geneve_sock *gs4, - struct flowi4 *fl4, - const struct ip_tunnel_info *info, - __be16 dport, __be16 sport, - __u8 *full_tos) +static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, + const struct ip_tunnel_info *info, + bool *use_cache) { - bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); - struct dst_cache *dst_cache; - struct rtable *rt = NULL; - __u8 tos; - - if (!gs4) - return ERR_PTR(-EIO); - - memset(fl4, 0, sizeof(*fl4)); - fl4->flowi4_mark = skb->mark; - fl4->flowi4_proto = IPPROTO_UDP; - fl4->daddr = info->key.u.ipv4.dst; - fl4->saddr = info->key.u.ipv4.src; - fl4->fl4_dport = dport; - fl4->fl4_sport = sport; - fl4->flowi4_flags = info->key.flow_flags; + u8 dsfield; - tos = info->key.tos; - if ((tos == 1) && !geneve->cfg.collect_md) { - tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb); - use_cache = false; + dsfield = info->key.tos; + if (dsfield == 1 && !geneve->cfg.collect_md) { + dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); + *use_cache = false; } - fl4->flowi4_tos = RT_TOS(tos); - if (full_tos) - *full_tos = tos; - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); - if (rt) - return rt; - } - rt = ip_route_output_key(geneve->net, fl4); - if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); - return ERR_PTR(-ENETUNREACH); - } - if (rt->dst.dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); - ip_rt_put(rt); - return ERR_PTR(-ELOOP); - } - if (use_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); - return rt; + return dsfield; } #if IS_ENABLED(CONFIG_IPV6) @@ -865,12 +825,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->fl6_dport = dport; fl6->fl6_sport = sport; - prio = info->key.tos; - if ((prio == 1) && !geneve->cfg.collect_md) { - prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); - use_cache = false; - } - + prio = geneve_get_dsfield(skb, dev, info, &use_cache); fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label); dst_cache = (struct dst_cache *)&info->dst_cache; if (use_cache) { @@ -904,19 +859,28 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; - struct flowi4 fl4; - __u8 full_tos; + bool use_cache; __u8 tos, ttl; __be16 df = 0; + __be32 saddr; __be16 sport; int err; if (!pskb_inet_may_pull(skb)) return -EINVAL; + if (!gs4) + return -EIO; + + use_cache = ip_tunnel_dst_cache_usable(skb, info); + tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, - geneve->cfg.info.key.tp_dst, sport, &full_tos); + + rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, + &info->key, + sport, geneve->cfg.info.key.tp_dst, tos, + use_cache ? + (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -939,8 +903,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -ENOMEM; } - unclone->key.u.ipv4.dst = fl4.saddr; - unclone->key.u.ipv4.src = fl4.daddr; + unclone->key.u.ipv4.dst = saddr; + unclone->key.u.ipv4.src = info->key.u.ipv4.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { @@ -954,13 +918,12 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; } + tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { - tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; } else { - tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb); if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else @@ -988,7 +951,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) return err; - udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, + udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), !(info->key.tun_flags & TUNNEL_CSUM)); @@ -1137,19 +1100,29 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; - struct flowi4 fl4; - struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + bool use_cache; + __be32 saddr; + u8 tos; + + if (!gs4) + return -EIO; + + use_cache = ip_tunnel_dst_cache_usable(skb, info); + tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, - geneve->cfg.info.key.tp_dst, sport, NULL); + rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, + &info->key, + sport, geneve->cfg.info.key.tp_dst, + tos, + use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); - info->key.u.ipv4.src = fl4.saddr; + info->key.u.ipv4.src = saddr; #if IS_ENABLED(CONFIG_IPV6) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 82e9796c8f5e..1dafa44155d0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -851,7 +851,7 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - fallthrough; + break; case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + @@ -860,7 +860,7 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - fallthrough; + break; case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + @@ -869,7 +869,7 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - fallthrough; + break; case NVSP_MSG5_TYPE_SUBCHANNEL: if (msglen < sizeof(struct nvsp_message_header) + @@ -878,10 +878,6 @@ static void netvsc_send_completion(struct net_device *ndev, msglen); return; } - /* Copy the response back */ - memcpy(&net_device->channel_init_pkt, nvsp_packet, - sizeof(struct nvsp_message)); - complete(&net_device->channel_init_wait); break; case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: @@ -904,13 +900,19 @@ static void netvsc_send_completion(struct net_device *ndev, netvsc_send_tx_complete(ndev, net_device, incoming_channel, desc, budget); - break; + return; default: netdev_err(ndev, "Unknown send completion type %d received!!\n", nvsp_packet->hdr.msg_type); + return; } + + /* Copy the response back */ + memcpy(&net_device->channel_init_pkt, nvsp_packet, + sizeof(struct nvsp_message)); + complete(&net_device->channel_init_wait); } static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index aebb19f1b3a4..4ec0dab38872 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2740,7 +2740,6 @@ static int ca8210_register_ext_clock(struct spi_device *spi) struct device_node *np = spi->dev.of_node; struct ca8210_priv *priv = spi_get_drvdata(spi); struct ca8210_platform_data *pdata = spi->dev.platform_data; - int ret = 0; if (!np) return -EFAULT; @@ -2757,18 +2756,8 @@ static int ca8210_register_ext_clock(struct spi_device *spi) dev_crit(&spi->dev, "Failed to register external clk\n"); return PTR_ERR(priv->clk); } - ret = of_clk_add_provider(np, of_clk_src_simple_get, priv->clk); - if (ret) { - clk_unregister(priv->clk); - dev_crit( - &spi->dev, - "Failed to register external clock as clock provider\n" - ); - } else { - dev_info(&spi->dev, "External clock set as clock provider\n"); - } - return ret; + return of_clk_add_provider(np, of_clk_src_simple_get, priv->clk); } /** @@ -2780,8 +2769,8 @@ static void ca8210_unregister_ext_clock(struct spi_device *spi) { struct ca8210_priv *priv = spi_get_drvdata(spi); - if (!priv->clk) - return + if (IS_ERR_OR_NULL(priv->clk)) + return; of_clk_del_provider(spi->dev.of_node); clk_unregister(priv->clk); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 7a44e1cbe305..9663050a852d 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2383,6 +2383,7 @@ static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; + ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_txsa, &ctx); @@ -2476,6 +2477,7 @@ static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; + ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsa, &ctx); diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig index dc71657d9184..ce9d2d2ccf3b 100644 --- a/drivers/net/mctp/Kconfig +++ b/drivers/net/mctp/Kconfig @@ -33,6 +33,15 @@ config MCTP_TRANSPORT_I2C from DMTF specification DSP0237. A MCTP protocol network device is created for each I2C bus that has been assigned a mctp-i2c device. +config MCTP_TRANSPORT_I3C + tristate "MCTP I3C transport" + depends on I3C + help + Provides a driver to access MCTP devices over I3C transport, + from DMTF specification DSP0233. + A MCTP protocol network device is created for each I3C bus + having a "mctp-controller" devicetree property. + endmenu endif diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile index 1ca3e6028f77..e1cb99ced54a 100644 --- a/drivers/net/mctp/Makefile +++ b/drivers/net/mctp/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o +obj-$(CONFIG_MCTP_TRANSPORT_I3C) += mctp-i3c.o diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c new file mode 100644 index 000000000000..8e989c157caa --- /dev/null +++ b/drivers/net/mctp/mctp-i3c.c @@ -0,0 +1,755 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implements DMTF specification + * "DSP0233 Management Component Transport Protocol (MCTP) I3C Transport + * Binding" + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0233_1.0.0.pdf + * + * Copyright (c) 2023 Code Construct + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/i3c/device.h> +#include <linux/i3c/master.h> +#include <linux/if_arp.h> +#include <asm/unaligned.h> +#include <net/mctp.h> +#include <net/mctpdevice.h> + +#define MCTP_I3C_MAXBUF 65536 +/* 48 bit Provisioned Id */ +#define PID_SIZE 6 + +/* 64 byte payload, 4 byte MCTP header */ +static const int MCTP_I3C_MINMTU = 64 + 4; +/* One byte less to allow for the PEC */ +static const int MCTP_I3C_MAXMTU = MCTP_I3C_MAXBUF - 1; +/* 4 byte MCTP header, no data, 1 byte PEC */ +static const int MCTP_I3C_MINLEN = 4 + 1; + +/* Sufficient for 64kB at min mtu */ +static const int MCTP_I3C_TX_QUEUE_LEN = 1100; + +/* Somewhat arbitrary */ +static const int MCTP_I3C_IBI_SLOTS = 8; + +/* Mandatory Data Byte in an IBI, from DSP0233 */ +#define I3C_MDB_MCTP 0xAE +/* From MIPI Device Characteristics Register (DCR) Assignments */ +#define I3C_DCR_MCTP 0xCC + +static const char *MCTP_I3C_OF_PROP = "mctp-controller"; + +/* List of mctp_i3c_busdev */ +static LIST_HEAD(busdevs); +/* Protects busdevs, as well as mctp_i3c_bus.devs lists */ +static DEFINE_MUTEX(busdevs_lock); + +struct mctp_i3c_bus { + struct net_device *ndev; + + struct task_struct *tx_thread; + wait_queue_head_t tx_wq; + /* tx_lock protects tx_skb and devs */ + spinlock_t tx_lock; + /* Next skb to transmit */ + struct sk_buff *tx_skb; + /* Scratch buffer for xmit */ + u8 tx_scratch[MCTP_I3C_MAXBUF]; + + /* Element of busdevs */ + struct list_head list; + + /* Provisioned ID of our controller */ + u64 pid; + + struct i3c_bus *bus; + /* Head of mctp_i3c_device.list. Protected by busdevs_lock */ + struct list_head devs; +}; + +struct mctp_i3c_device { + struct i3c_device *i3c; + struct mctp_i3c_bus *mbus; + struct list_head list; /* Element of mctp_i3c_bus.devs */ + + /* Held while tx_thread is using this device */ + struct mutex lock; + + /* Whether BCR indicates MDB is present in IBI */ + bool have_mdb; + /* I3C dynamic address */ + u8 addr; + /* Maximum read length */ + u16 mrl; + /* Maximum write length */ + u16 mwl; + /* Provisioned ID */ + u64 pid; +}; + +/* We synthesise a mac header using the Provisioned ID. + * Used to pass dest to mctp_i3c_start_xmit. + */ +struct mctp_i3c_internal_hdr { + u8 dest[PID_SIZE]; + u8 source[PID_SIZE]; +} __packed; + +static int mctp_i3c_read(struct mctp_i3c_device *mi) +{ + struct i3c_priv_xfer xfer = { .rnw = 1, .len = mi->mrl }; + struct net_device_stats *stats = &mi->mbus->ndev->stats; + struct mctp_i3c_internal_hdr *ihdr = NULL; + struct sk_buff *skb = NULL; + struct mctp_skb_cb *cb; + int net_status, rc; + u8 pec, addr; + + skb = netdev_alloc_skb(mi->mbus->ndev, + mi->mrl + sizeof(struct mctp_i3c_internal_hdr)); + if (!skb) { + stats->rx_dropped++; + rc = -ENOMEM; + goto err; + } + + skb->protocol = htons(ETH_P_MCTP); + /* Create a header for internal use */ + skb_reset_mac_header(skb); + ihdr = skb_put(skb, sizeof(struct mctp_i3c_internal_hdr)); + put_unaligned_be48(mi->pid, ihdr->source); + put_unaligned_be48(mi->mbus->pid, ihdr->dest); + skb_pull(skb, sizeof(struct mctp_i3c_internal_hdr)); + + xfer.data.in = skb_put(skb, mi->mrl); + + rc = i3c_device_do_priv_xfers(mi->i3c, &xfer, 1); + if (rc < 0) + goto err; + + if (WARN_ON_ONCE(xfer.len > mi->mrl)) { + /* Bad i3c bus driver */ + rc = -EIO; + goto err; + } + if (xfer.len < MCTP_I3C_MINLEN) { + stats->rx_length_errors++; + rc = -EIO; + goto err; + } + + /* check PEC, including address byte */ + addr = mi->addr << 1 | 1; + pec = i2c_smbus_pec(0, &addr, 1); + pec = i2c_smbus_pec(pec, xfer.data.in, xfer.len - 1); + if (pec != ((u8 *)xfer.data.in)[xfer.len - 1]) { + stats->rx_crc_errors++; + rc = -EINVAL; + goto err; + } + + /* Remove PEC */ + skb_trim(skb, xfer.len - 1); + + cb = __mctp_cb(skb); + cb->halen = PID_SIZE; + put_unaligned_be48(mi->pid, cb->haddr); + + net_status = netif_rx(skb); + + if (net_status == NET_RX_SUCCESS) { + stats->rx_packets++; + stats->rx_bytes += xfer.len - 1; + } else { + stats->rx_dropped++; + } + + return 0; +err: + kfree_skb(skb); + return rc; +} + +static void mctp_i3c_ibi_handler(struct i3c_device *i3c, + const struct i3c_ibi_payload *payload) +{ + struct mctp_i3c_device *mi = i3cdev_get_drvdata(i3c); + + if (WARN_ON_ONCE(!mi)) + return; + + if (mi->have_mdb) { + if (payload->len > 0) { + if (((u8 *)payload->data)[0] != I3C_MDB_MCTP) { + /* Not a mctp-i3c interrupt, ignore it */ + return; + } + } else { + /* The BCR advertised a Mandatory Data Byte but the + * device didn't send one. + */ + dev_warn_once(i3cdev_to_dev(i3c), "IBI with missing MDB"); + } + } + + mctp_i3c_read(mi); +} + +static int mctp_i3c_setup(struct mctp_i3c_device *mi) +{ + const struct i3c_ibi_setup ibi = { + .max_payload_len = 1, + .num_slots = MCTP_I3C_IBI_SLOTS, + .handler = mctp_i3c_ibi_handler, + }; + struct i3c_device_info info; + int rc; + + i3c_device_get_info(mi->i3c, &info); + mi->have_mdb = info.bcr & BIT(2); + mi->addr = info.dyn_addr; + mi->mwl = info.max_write_len; + mi->mrl = info.max_read_len; + mi->pid = info.pid; + + rc = i3c_device_request_ibi(mi->i3c, &ibi); + if (rc == -ENOTSUPP) { + /* This driver only supports In-Band Interrupt mode. + * Support for Polling Mode could be added if required. + * (ENOTSUPP is from the i3c layer, not EOPNOTSUPP). + */ + dev_warn(i3cdev_to_dev(mi->i3c), + "Failed, bus driver doesn't support In-Band Interrupts"); + goto err; + } else if (rc < 0) { + dev_err(i3cdev_to_dev(mi->i3c), + "Failed requesting IBI (%d)\n", rc); + goto err; + } + + rc = i3c_device_enable_ibi(mi->i3c); + if (rc < 0) { + /* Assume a driver supporting request_ibi also + * supports enable_ibi. + */ + dev_err(i3cdev_to_dev(mi->i3c), "Failed enabling IBI (%d)\n", rc); + goto err_free_ibi; + } + + return 0; + +err_free_ibi: + i3c_device_free_ibi(mi->i3c); + +err: + return rc; +} + +/* Adds a new MCTP i3c_device to a bus */ +static int mctp_i3c_add_device(struct mctp_i3c_bus *mbus, + struct i3c_device *i3c) +__must_hold(&busdevs_lock) +{ + struct mctp_i3c_device *mi = NULL; + int rc; + + mi = kzalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) { + rc = -ENOMEM; + goto err; + } + mi->mbus = mbus; + mi->i3c = i3c; + mutex_init(&mi->lock); + list_add(&mi->list, &mbus->devs); + + i3cdev_set_drvdata(i3c, mi); + rc = mctp_i3c_setup(mi); + if (rc < 0) + goto err_free; + + return 0; + +err_free: + list_del(&mi->list); + kfree(mi); + +err: + dev_warn(i3cdev_to_dev(i3c), "Error adding mctp-i3c device, %d\n", rc); + return rc; +} + +static int mctp_i3c_probe(struct i3c_device *i3c) +{ + struct mctp_i3c_bus *b = NULL, *mbus = NULL; + + /* Look for a known bus */ + mutex_lock(&busdevs_lock); + list_for_each_entry(b, &busdevs, list) + if (b->bus == i3c->bus) { + mbus = b; + break; + } + mutex_unlock(&busdevs_lock); + + if (!mbus) { + /* probably no "mctp-controller" property on the i3c bus */ + return -ENODEV; + } + + return mctp_i3c_add_device(mbus, i3c); +} + +static void mctp_i3c_remove_device(struct mctp_i3c_device *mi) +__must_hold(&busdevs_lock) +{ + /* Ensure the tx thread isn't using the device */ + mutex_lock(&mi->lock); + + /* Counterpart of mctp_i3c_setup */ + i3c_device_disable_ibi(mi->i3c); + i3c_device_free_ibi(mi->i3c); + + /* Counterpart of mctp_i3c_add_device */ + i3cdev_set_drvdata(mi->i3c, NULL); + list_del(&mi->list); + + /* Safe to unlock after removing from the list */ + mutex_unlock(&mi->lock); + kfree(mi); +} + +static void mctp_i3c_remove(struct i3c_device *i3c) +{ + struct mctp_i3c_device *mi = i3cdev_get_drvdata(i3c); + + /* We my have received a Bus Remove notify prior to device remove, + * so mi will already be removed. + */ + if (!mi) + return; + + mutex_lock(&busdevs_lock); + mctp_i3c_remove_device(mi); + mutex_unlock(&busdevs_lock); +} + +/* Returns the device for an address, with mi->lock held */ +static struct mctp_i3c_device * +mctp_i3c_lookup(struct mctp_i3c_bus *mbus, u64 pid) +{ + struct mctp_i3c_device *mi = NULL, *ret = NULL; + + mutex_lock(&busdevs_lock); + list_for_each_entry(mi, &mbus->devs, list) + if (mi->pid == pid) { + ret = mi; + mutex_lock(&mi->lock); + break; + } + mutex_unlock(&busdevs_lock); + return ret; +} + +static void mctp_i3c_xmit(struct mctp_i3c_bus *mbus, struct sk_buff *skb) +{ + struct net_device_stats *stats = &mbus->ndev->stats; + struct i3c_priv_xfer xfer = { .rnw = false }; + struct mctp_i3c_internal_hdr *ihdr = NULL; + struct mctp_i3c_device *mi = NULL; + unsigned int data_len; + u8 *data = NULL; + u8 addr, pec; + int rc = 0; + u64 pid; + + skb_pull(skb, sizeof(struct mctp_i3c_internal_hdr)); + data_len = skb->len; + + ihdr = (void *)skb_mac_header(skb); + + pid = get_unaligned_be48(ihdr->dest); + mi = mctp_i3c_lookup(mbus, pid); + if (!mi) { + /* I3C endpoint went away after the packet was enqueued? */ + stats->tx_dropped++; + goto out; + } + + if (WARN_ON_ONCE(data_len + 1 > MCTP_I3C_MAXBUF)) + goto out; + + if (data_len + 1 > (unsigned int)mi->mwl) { + /* Route MTU was larger than supported by the endpoint */ + stats->tx_dropped++; + goto out; + } + + /* Need a linear buffer with space for the PEC */ + xfer.len = data_len + 1; + if (skb_tailroom(skb) >= 1) { + skb_put(skb, 1); + data = skb->data; + } else { + /* Otherwise need to copy the buffer */ + skb_copy_bits(skb, 0, mbus->tx_scratch, skb->len); + data = mbus->tx_scratch; + } + + /* PEC calculation */ + addr = mi->addr << 1; + pec = i2c_smbus_pec(0, &addr, 1); + pec = i2c_smbus_pec(pec, data, data_len); + data[data_len] = pec; + + xfer.data.out = data; + rc = i3c_device_do_priv_xfers(mi->i3c, &xfer, 1); + if (rc == 0) { + stats->tx_bytes += data_len; + stats->tx_packets++; + } else { + stats->tx_errors++; + } + +out: + if (mi) + mutex_unlock(&mi->lock); +} + +static int mctp_i3c_tx_thread(void *data) +{ + struct mctp_i3c_bus *mbus = data; + struct sk_buff *skb; + + for (;;) { + if (kthread_should_stop()) + break; + + spin_lock_bh(&mbus->tx_lock); + skb = mbus->tx_skb; + mbus->tx_skb = NULL; + spin_unlock_bh(&mbus->tx_lock); + + if (netif_queue_stopped(mbus->ndev)) + netif_wake_queue(mbus->ndev); + + if (skb) { + mctp_i3c_xmit(mbus, skb); + kfree_skb(skb); + } else { + wait_event_idle(mbus->tx_wq, + mbus->tx_skb || kthread_should_stop()); + } + } + + return 0; +} + +static netdev_tx_t mctp_i3c_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct mctp_i3c_bus *mbus = netdev_priv(ndev); + netdev_tx_t ret; + + spin_lock(&mbus->tx_lock); + netif_stop_queue(ndev); + if (mbus->tx_skb) { + dev_warn_ratelimited(&ndev->dev, "TX with queue stopped"); + ret = NETDEV_TX_BUSY; + } else { + mbus->tx_skb = skb; + ret = NETDEV_TX_OK; + } + spin_unlock(&mbus->tx_lock); + + if (ret == NETDEV_TX_OK) + wake_up(&mbus->tx_wq); + + return ret; +} + +static void mctp_i3c_bus_free(struct mctp_i3c_bus *mbus) +__must_hold(&busdevs_lock) +{ + struct mctp_i3c_device *mi = NULL, *tmp = NULL; + + if (mbus->tx_thread) { + kthread_stop(mbus->tx_thread); + mbus->tx_thread = NULL; + } + + /* Remove any child devices */ + list_for_each_entry_safe(mi, tmp, &mbus->devs, list) { + mctp_i3c_remove_device(mi); + } + + kfree_skb(mbus->tx_skb); + list_del(&mbus->list); +} + +static void mctp_i3c_ndo_uninit(struct net_device *ndev) +{ + struct mctp_i3c_bus *mbus = netdev_priv(ndev); + + /* Perform cleanup here to ensure there are no remaining references */ + mctp_i3c_bus_free(mbus); +} + +static int mctp_i3c_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct mctp_i3c_internal_hdr *ihdr; + + skb_push(skb, sizeof(struct mctp_i3c_internal_hdr)); + skb_reset_mac_header(skb); + ihdr = (void *)skb_mac_header(skb); + memcpy(ihdr->dest, daddr, PID_SIZE); + memcpy(ihdr->source, saddr, PID_SIZE); + return 0; +} + +static const struct net_device_ops mctp_i3c_ops = { + .ndo_start_xmit = mctp_i3c_start_xmit, + .ndo_uninit = mctp_i3c_ndo_uninit, +}; + +static const struct header_ops mctp_i3c_headops = { + .create = mctp_i3c_header_create, +}; + +static void mctp_i3c_net_setup(struct net_device *dev) +{ + dev->type = ARPHRD_MCTP; + + dev->mtu = MCTP_I3C_MAXMTU; + dev->min_mtu = MCTP_I3C_MINMTU; + dev->max_mtu = MCTP_I3C_MAXMTU; + dev->tx_queue_len = MCTP_I3C_TX_QUEUE_LEN; + + dev->hard_header_len = sizeof(struct mctp_i3c_internal_hdr); + dev->addr_len = PID_SIZE; + + dev->netdev_ops = &mctp_i3c_ops; + dev->header_ops = &mctp_i3c_headops; +} + +static bool mctp_i3c_is_mctp_controller(struct i3c_bus *bus) +{ + struct i3c_dev_desc *master = bus->cur_master; + + if (!master) + return false; + + return of_property_read_bool(master->common.master->dev.of_node, + MCTP_I3C_OF_PROP); +} + +/* Returns the Provisioned Id of a local bus master */ +static int mctp_i3c_bus_local_pid(struct i3c_bus *bus, u64 *ret_pid) +{ + struct i3c_dev_desc *master; + + master = bus->cur_master; + if (WARN_ON_ONCE(!master)) + return -ENOENT; + *ret_pid = master->info.pid; + + return 0; +} + +/* Returns an ERR_PTR on failure */ +static struct mctp_i3c_bus *mctp_i3c_bus_add(struct i3c_bus *bus) +__must_hold(&busdevs_lock) +{ + struct mctp_i3c_bus *mbus = NULL; + struct net_device *ndev = NULL; + char namebuf[IFNAMSIZ]; + u8 addr[PID_SIZE]; + int rc; + + if (!mctp_i3c_is_mctp_controller(bus)) + return ERR_PTR(-ENOENT); + + snprintf(namebuf, sizeof(namebuf), "mctpi3c%d", bus->id); + ndev = alloc_netdev(sizeof(*mbus), namebuf, NET_NAME_ENUM, + mctp_i3c_net_setup); + if (!ndev) { + rc = -ENOMEM; + goto err; + } + + mbus = netdev_priv(ndev); + mbus->ndev = ndev; + mbus->bus = bus; + INIT_LIST_HEAD(&mbus->devs); + list_add(&mbus->list, &busdevs); + + rc = mctp_i3c_bus_local_pid(bus, &mbus->pid); + if (rc < 0) { + dev_err(&ndev->dev, "No I3C PID available\n"); + goto err_free_uninit; + } + put_unaligned_be48(mbus->pid, addr); + dev_addr_set(ndev, addr); + + init_waitqueue_head(&mbus->tx_wq); + spin_lock_init(&mbus->tx_lock); + mbus->tx_thread = kthread_run(mctp_i3c_tx_thread, mbus, + "%s/tx", ndev->name); + if (IS_ERR(mbus->tx_thread)) { + dev_warn(&ndev->dev, "Error creating thread: %pe\n", + mbus->tx_thread); + rc = PTR_ERR(mbus->tx_thread); + mbus->tx_thread = NULL; + goto err_free_uninit; + } + + rc = mctp_register_netdev(ndev, NULL); + if (rc < 0) { + dev_warn(&ndev->dev, "netdev register failed: %d\n", rc); + goto err_free_netdev; + } + return mbus; + +err_free_uninit: + /* uninit will not get called if a netdev has not been registered, + * so we perform the same mbus cleanup manually. + */ + mctp_i3c_bus_free(mbus); + +err_free_netdev: + free_netdev(ndev); + +err: + return ERR_PTR(rc); +} + +static void mctp_i3c_bus_remove(struct mctp_i3c_bus *mbus) +__must_hold(&busdevs_lock) +{ + /* Unregister calls through to ndo_uninit -> mctp_i3c_bus_free() */ + mctp_unregister_netdev(mbus->ndev); + + free_netdev(mbus->ndev); + /* mbus is deallocated */ +} + +/* Removes all mctp-i3c busses */ +static void mctp_i3c_bus_remove_all(void) +{ + struct mctp_i3c_bus *mbus = NULL, *tmp = NULL; + + mutex_lock(&busdevs_lock); + list_for_each_entry_safe(mbus, tmp, &busdevs, list) { + mctp_i3c_bus_remove(mbus); + } + mutex_unlock(&busdevs_lock); +} + +/* Adds a i3c_bus if it isn't already in the busdevs list. + * Suitable as an i3c_for_each_bus_locked callback. + */ +static int mctp_i3c_bus_add_new(struct i3c_bus *bus, void *data) +{ + struct mctp_i3c_bus *mbus = NULL, *tmp = NULL; + bool exists = false; + + mutex_lock(&busdevs_lock); + list_for_each_entry_safe(mbus, tmp, &busdevs, list) + if (mbus->bus == bus) + exists = true; + + /* It is OK for a bus to already exist. That can occur due to + * the race in mod_init between notifier and for_each_bus + */ + if (!exists) + mctp_i3c_bus_add(bus); + mutex_unlock(&busdevs_lock); + return 0; +} + +static void mctp_i3c_notify_bus_remove(struct i3c_bus *bus) +{ + struct mctp_i3c_bus *mbus = NULL, *tmp; + + mutex_lock(&busdevs_lock); + list_for_each_entry_safe(mbus, tmp, &busdevs, list) + if (mbus->bus == bus) + mctp_i3c_bus_remove(mbus); + mutex_unlock(&busdevs_lock); +} + +static int mctp_i3c_notifier_call(struct notifier_block *nb, + unsigned long action, void *data) +{ + switch (action) { + case I3C_NOTIFY_BUS_ADD: + mctp_i3c_bus_add_new((struct i3c_bus *)data, NULL); + break; + case I3C_NOTIFY_BUS_REMOVE: + mctp_i3c_notify_bus_remove((struct i3c_bus *)data); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block mctp_i3c_notifier = { + .notifier_call = mctp_i3c_notifier_call, +}; + +static const struct i3c_device_id mctp_i3c_ids[] = { + I3C_CLASS(I3C_DCR_MCTP, NULL), + { 0 }, +}; + +static struct i3c_driver mctp_i3c_driver = { + .driver = { + .name = "mctp-i3c", + }, + .probe = mctp_i3c_probe, + .remove = mctp_i3c_remove, + .id_table = mctp_i3c_ids, +}; + +static __init int mctp_i3c_mod_init(void) +{ + int rc; + + rc = i3c_register_notifier(&mctp_i3c_notifier); + if (rc < 0) { + i3c_driver_unregister(&mctp_i3c_driver); + return rc; + } + + i3c_for_each_bus_locked(mctp_i3c_bus_add_new, NULL); + + rc = i3c_driver_register(&mctp_i3c_driver); + if (rc < 0) + return rc; + + return 0; +} + +static __exit void mctp_i3c_mod_exit(void) +{ + int rc; + + i3c_driver_unregister(&mctp_i3c_driver); + + rc = i3c_unregister_notifier(&mctp_i3c_notifier); + if (rc < 0) + pr_warn("MCTP I3C could not unregister notifier, %d\n", rc); + + mctp_i3c_bus_remove_all(); +} + +module_init(mctp_i3c_mod_init); +module_exit(mctp_i3c_mod_exit); + +MODULE_DEVICE_TABLE(i3c, mctp_i3c_ids); +MODULE_DESCRIPTION("MCTP I3C device"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Matt Johnston <matt@codeconstruct.com.au>"); diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c index 7909d7caf45c..495fbe35b6ce 100644 --- a/drivers/net/mdio/mdio-xgene.c +++ b/drivers/net/mdio/mdio-xgene.c @@ -13,11 +13,13 @@ #include <linux/io.h> #include <linux/mdio/mdio-xgene.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_net.h> -#include <linux/of_platform.h> #include <linux/phy.h> +#include <linux/platform_device.h> #include <linux/prefetch.h> +#include <linux/property.h> #include <net/ip.h> u32 xgene_mdio_rd_mac(struct xgene_mdio_pdata *pdata, u32 rd_addr) @@ -326,24 +328,11 @@ static int xgene_mdio_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct mii_bus *mdio_bus; - const struct of_device_id *of_id; struct xgene_mdio_pdata *pdata; void __iomem *csr_base; int mdio_id = 0, ret = 0; - of_id = of_match_device(xgene_mdio_of_match, &pdev->dev); - if (of_id) { - mdio_id = (uintptr_t)of_id->data; - } else { -#ifdef CONFIG_ACPI - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(xgene_mdio_acpi_match, &pdev->dev); - if (acpi_id) - mdio_id = (enum xgene_mdio_id)acpi_id->driver_data; -#endif - } - + mdio_id = (uintptr_t)device_get_match_data(&pdev->dev); if (!mdio_id) return -ENODEV; diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 3111e1648592..6e14ba5e06c8 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -53,6 +53,8 @@ static bool oops_only = false; module_param(oops_only, bool, 0600); MODULE_PARM_DESC(oops_only, "Only log oops messages"); +#define NETCONSOLE_PARAM_TARGET_PREFIX "cmdline" + #ifndef MODULE static int __init option_setup(char *opt) { @@ -165,6 +167,10 @@ static void netconsole_target_put(struct netconsole_target *nt) { } +static void populate_configfs_item(struct netconsole_target *nt, + int cmdline_count) +{ +} #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Allocate and initialize with defaults. @@ -192,58 +198,6 @@ static struct netconsole_target *alloc_and_init(void) return nt; } -/* Allocate new target (from boot/module param) and setup netpoll for it */ -static struct netconsole_target *alloc_param_target(char *target_config) -{ - struct netconsole_target *nt; - int err; - - nt = alloc_and_init(); - if (!nt) { - err = -ENOMEM; - goto fail; - } - - if (*target_config == '+') { - nt->extended = true; - target_config++; - } - - if (*target_config == 'r') { - if (!nt->extended) { - pr_err("Netconsole configuration error. Release feature requires extended log message"); - err = -EINVAL; - goto fail; - } - nt->release = true; - target_config++; - } - - /* Parse parameters and setup netpoll */ - err = netpoll_parse_options(&nt->np, target_config); - if (err) - goto fail; - - err = netpoll_setup(&nt->np); - if (err) - goto fail; - - nt->enabled = true; - - return nt; - -fail: - kfree(nt); - return ERR_PTR(err); -} - -/* Cleanup netpoll for given target (from boot/module param) and free it */ -static void free_param_target(struct netconsole_target *nt) -{ - netpoll_cleanup(&nt->np); - kfree(nt); -} - #ifdef CONFIG_NETCONSOLE_DYNAMIC /* @@ -675,6 +629,23 @@ static const struct config_item_type netconsole_target_type = { .ct_owner = THIS_MODULE, }; +static struct netconsole_target *find_cmdline_target(const char *name) +{ + struct netconsole_target *nt, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&target_list_lock, flags); + list_for_each_entry(nt, &target_list, list) { + if (!strcmp(nt->item.ci_name, name)) { + ret = nt; + break; + } + } + spin_unlock_irqrestore(&target_list_lock, flags); + + return ret; +} + /* * Group operations and type for netconsole_subsys. */ @@ -685,6 +656,17 @@ static struct config_item *make_netconsole_target(struct config_group *group, struct netconsole_target *nt; unsigned long flags; + /* Checking if a target by this name was created at boot time. If so, + * attach a configfs entry to that target. This enables dynamic + * control. + */ + if (!strncmp(name, NETCONSOLE_PARAM_TARGET_PREFIX, + strlen(NETCONSOLE_PARAM_TARGET_PREFIX))) { + nt = find_cmdline_target(name); + if (nt) + return &nt->item; + } + nt = alloc_and_init(); if (!nt) return ERR_PTR(-ENOMEM); @@ -740,6 +722,17 @@ static struct configfs_subsystem netconsole_subsys = { }, }; +static void populate_configfs_item(struct netconsole_target *nt, + int cmdline_count) +{ + char target_name[16]; + + snprintf(target_name, sizeof(target_name), "%s%d", + NETCONSOLE_PARAM_TARGET_PREFIX, cmdline_count); + config_item_init_type_name(&nt->item, target_name, + &netconsole_target_type); +} + #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Handle network interface device notifications */ @@ -938,6 +931,60 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) spin_unlock_irqrestore(&target_list_lock, flags); } +/* Allocate new target (from boot/module param) and setup netpoll for it */ +static struct netconsole_target *alloc_param_target(char *target_config, + int cmdline_count) +{ + struct netconsole_target *nt; + int err; + + nt = alloc_and_init(); + if (!nt) { + err = -ENOMEM; + goto fail; + } + + if (*target_config == '+') { + nt->extended = true; + target_config++; + } + + if (*target_config == 'r') { + if (!nt->extended) { + pr_err("Netconsole configuration error. Release feature requires extended log message"); + err = -EINVAL; + goto fail; + } + nt->release = true; + target_config++; + } + + /* Parse parameters and setup netpoll */ + err = netpoll_parse_options(&nt->np, target_config); + if (err) + goto fail; + + err = netpoll_setup(&nt->np); + if (err) + goto fail; + + populate_configfs_item(nt, cmdline_count); + nt->enabled = true; + + return nt; + +fail: + kfree(nt); + return ERR_PTR(err); +} + +/* Cleanup netpoll for given target (from boot/module param) and free it */ +static void free_param_target(struct netconsole_target *nt) +{ + netpoll_cleanup(&nt->np); + kfree(nt); +} + static struct console netconsole_ext = { .name = "netcon_ext", .flags = CON_ENABLED | CON_EXTENDED, @@ -954,6 +1001,7 @@ static int __init init_netconsole(void) { int err; struct netconsole_target *nt, *tmp; + unsigned int count = 0; bool extended = false; unsigned long flags; char *target_config; @@ -961,7 +1009,7 @@ static int __init init_netconsole(void) if (strnlen(input, MAX_PARAM_LENGTH)) { while ((target_config = strsep(&input, ";"))) { - nt = alloc_param_target(target_config); + nt = alloc_param_target(target_config, count); if (IS_ERR(nt)) { err = PTR_ERR(nt); goto fail; @@ -977,6 +1025,7 @@ static int __init init_netconsole(void) spin_lock_irqsave(&target_list_lock, flags); list_add(&nt->list, &target_list); spin_unlock_irqrestore(&target_list_lock, flags); + count++; } } diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index 018253a573b8..4f39ba63a9a9 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -849,6 +849,9 @@ static int vsc8584_macsec_upd_rxsa(struct macsec_context *ctx) struct macsec_flow *flow; int ret; + if (ctx->sa.update_pn) + return -EINVAL; + flow = vsc8584_macsec_find_flow(ctx, MACSEC_INGR); if (IS_ERR(flow)) return PTR_ERR(flow); @@ -900,6 +903,9 @@ static int vsc8584_macsec_upd_txsa(struct macsec_context *ctx) struct macsec_flow *flow; int ret; + if (ctx->sa.update_pn) + return -EINVAL; + flow = vsc8584_macsec_find_flow(ctx, MACSEC_EGR); if (IS_ERR(flow)) return PTR_ERR(flow); diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index b13e15310feb..a71399965142 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -414,10 +414,8 @@ static void tja11xx_get_strings(struct phy_device *phydev, u8 *data) { int i; - for (i = 0; i < ARRAY_SIZE(tja11xx_hw_stats); i++) { - strncpy(data + i * ETH_GSTRING_LEN, - tja11xx_hw_stats[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < ARRAY_SIZE(tja11xx_hw_stats); i++) + ethtool_sprintf(&data, "%s", tja11xx_hw_stats[i].string); } static void tja11xx_get_stats(struct phy_device *phydev, diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index c88edb19d2e7..1c7306a1af13 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -507,10 +507,8 @@ static void smsc_get_strings(struct phy_device *phydev, u8 *data) { int i; - for (i = 0; i < ARRAY_SIZE(smsc_hw_stats); i++) { - strncpy(data + i * ETH_GSTRING_LEN, - smsc_hw_stats[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < ARRAY_SIZE(smsc_hw_stats); i++) + ethtool_sprintf(&data, "%s", smsc_hw_stats[i].string); } static u64 smsc_get_stat(struct phy_device *phydev, int i) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 48d7d278631e..99ec1d4a972d 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -222,13 +222,18 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; + int err; if (phy_id) { netdev_dbg(dev->net, "Only internal phy supported\n"); return 0; } - dm_read_shared_word(dev, 1, loc, &res); + err = dm_read_shared_word(dev, 1, loc, &res); + if (err < 0) { + netdev_err(dev->net, "MDIO read error: %d\n", err); + return err; + } netdev_dbg(dev->net, "dm9601_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 59cde06aa7f6..5add4145d9fc 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1758,7 +1758,7 @@ static void lan78xx_get_drvinfo(struct net_device *net, { struct lan78xx_net *dev = netdev_priv(net); - strncpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info)); } diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index f5e19f3ef6cd..143bd4ab160d 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -474,8 +474,8 @@ static void sr_get_drvinfo(struct net_device *net, { /* Inherit standard device info */ usbnet_get_drvinfo(net, info); - strncpy(info->driver, DRIVER_NAME, sizeof(info->driver)); - strncpy(info->version, DRIVER_VERSION, sizeof(info->version)); + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + strscpy(info->version, DRIVER_VERSION, sizeof(info->version)); } static u32 sr_get_link(struct net_device *net) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1ed6bcc4cbb1..a52fd743504a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2855,6 +2855,9 @@ static void virtnet_get_ringparam(struct net_device *dev, ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq); } +static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi, + u16 vqn, u32 max_usecs, u32 max_packets); + static int virtnet_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -2890,12 +2893,36 @@ static int virtnet_set_ringparam(struct net_device *dev, err = virtnet_tx_resize(vi, sq, ring->tx_pending); if (err) return err; + + /* Upon disabling and re-enabling a transmit virtqueue, the device must + * set the coalescing parameters of the virtqueue to those configured + * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver + * did not set any TX coalescing parameters, to 0. + */ + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(i), + vi->intr_coal_tx.max_usecs, + vi->intr_coal_tx.max_packets); + if (err) + return err; + + vi->sq[i].intr_coal.max_usecs = vi->intr_coal_tx.max_usecs; + vi->sq[i].intr_coal.max_packets = vi->intr_coal_tx.max_packets; } if (ring->rx_pending != rx_pending) { err = virtnet_rx_resize(vi, rq, ring->rx_pending); if (err) return err; + + /* The reason is same as the transmit virtqueue reset */ + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(i), + vi->intr_coal_rx.max_usecs, + vi->intr_coal_rx.max_packets); + if (err) + return err; + + vi->rq[i].intr_coal.max_usecs = vi->intr_coal_rx.max_usecs; + vi->rq[i].intr_coal.max_packets = vi->intr_coal_rx.max_packets; } } @@ -3233,6 +3260,7 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec) { struct scatterlist sgs_tx, sgs_rx; + int i; vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); @@ -3246,6 +3274,10 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, /* Save parameters */ vi->intr_coal_tx.max_usecs = ec->tx_coalesce_usecs; vi->intr_coal_tx.max_packets = ec->tx_max_coalesced_frames; + for (i = 0; i < vi->max_queue_pairs; i++) { + vi->sq[i].intr_coal.max_usecs = ec->tx_coalesce_usecs; + vi->sq[i].intr_coal.max_packets = ec->tx_max_coalesced_frames; + } vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); @@ -3259,6 +3291,10 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, /* Save parameters */ vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs; vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames; + for (i = 0; i < vi->max_queue_pairs; i++) { + vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs; + vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames; + } return 0; } @@ -3287,27 +3323,23 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, { int err; - if (ec->rx_coalesce_usecs || ec->rx_max_coalesced_frames) { - err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), - ec->rx_coalesce_usecs, - ec->rx_max_coalesced_frames); - if (err) - return err; - /* Save parameters */ - vi->rq[queue].intr_coal.max_usecs = ec->rx_coalesce_usecs; - vi->rq[queue].intr_coal.max_packets = ec->rx_max_coalesced_frames; - } + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), + ec->rx_coalesce_usecs, + ec->rx_max_coalesced_frames); + if (err) + return err; - if (ec->tx_coalesce_usecs || ec->tx_max_coalesced_frames) { - err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), - ec->tx_coalesce_usecs, - ec->tx_max_coalesced_frames); - if (err) - return err; - /* Save parameters */ - vi->sq[queue].intr_coal.max_usecs = ec->tx_coalesce_usecs; - vi->sq[queue].intr_coal.max_packets = ec->tx_max_coalesced_frames; - } + vi->rq[queue].intr_coal.max_usecs = ec->rx_coalesce_usecs; + vi->rq[queue].intr_coal.max_packets = ec->rx_max_coalesced_frames; + + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), + ec->tx_coalesce_usecs, + ec->tx_max_coalesced_frames); + if (err) + return err; + + vi->sq[queue].intr_coal.max_usecs = ec->tx_coalesce_usecs; + vi->sq[queue].intr_coal.max_packets = ec->tx_max_coalesced_frames; return 0; } @@ -3315,7 +3347,7 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi, static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) { /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL - * feature is negotiated. + * or VIRTIO_NET_F_VQ_NOTF_COAL feature is negotiated. */ if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) return -EOPNOTSUPP; @@ -3453,7 +3485,7 @@ static int virtnet_get_per_queue_coalesce(struct net_device *dev, } else { ec->rx_max_coalesced_frames = 1; - if (vi->sq[0].napi.weight) + if (vi->sq[queue].napi.weight) ec->tx_max_coalesced_frames = 1; } @@ -4442,13 +4474,6 @@ static int virtnet_probe(struct virtio_device *vdev) dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; } - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { - vi->intr_coal_rx.max_usecs = 0; - vi->intr_coal_tx.max_usecs = 0; - vi->intr_coal_tx.max_packets = 0; - vi->intr_coal_rx.max_packets = 0; - } - if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; @@ -4523,6 +4548,27 @@ static int virtnet_probe(struct virtio_device *vdev) if (err) goto free; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + vi->intr_coal_rx.max_usecs = 0; + vi->intr_coal_tx.max_usecs = 0; + vi->intr_coal_rx.max_packets = 0; + + /* Keep the default values of the coalescing parameters + * aligned with the default napi_tx state. + */ + if (vi->sq[0].napi.weight) + vi->intr_coal_tx.max_packets = 1; + else + vi->intr_coal_tx.max_packets = 0; + } + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) { + /* The reason is the same as VIRTIO_NET_F_NOTF_COAL. */ + for (i = 0; i < vi->max_queue_pairs; i++) + if (vi->sq[i].napi.weight) + vi->sq[i].intr_coal.max_packets = 1; + } + #ifdef CONFIG_SYSFS if (vi->mergeable_rx_bufs) dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 5b5597073b00..6f7d45e3cfa2 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2215,57 +2215,6 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, return 0; } -static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev, - struct vxlan_sock *sock4, - struct sk_buff *skb, int oif, u8 tos, - __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport, - __u8 flow_flags, struct dst_cache *dst_cache, - const struct ip_tunnel_info *info) -{ - bool use_cache = ip_tunnel_dst_cache_usable(skb, info); - struct rtable *rt = NULL; - struct flowi4 fl4; - - if (!sock4) - return ERR_PTR(-EIO); - - if (tos && !info) - use_cache = false; - if (use_cache) { - rt = dst_cache_get_ip4(dst_cache, saddr); - if (rt) - return rt; - } - - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = oif; - fl4.flowi4_tos = RT_TOS(tos); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = IPPROTO_UDP; - fl4.daddr = daddr; - fl4.saddr = *saddr; - fl4.fl4_dport = dport; - fl4.fl4_sport = sport; - fl4.flowi4_flags = flow_flags; - - rt = ip_route_output_key(vxlan->net, &fl4); - if (!IS_ERR(rt)) { - if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to %pI4\n", &daddr); - ip_rt_put(rt); - return ERR_PTR(-ELOOP); - } - - *saddr = fl4.saddr; - if (use_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); - } else { - netdev_dbg(dev, "no route to %pI4\n", &daddr); - return ERR_PTR(-ENETUNREACH); - } - return rt; -} - #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct net_device *dev, @@ -2418,30 +2367,38 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, { struct dst_cache *dst_cache; struct ip_tunnel_info *info; + struct ip_tunnel_key *pkey; + struct ip_tunnel_key key; struct vxlan_dev *vxlan = netdev_priv(dev); const struct iphdr *old_iph = ip_hdr(skb); union vxlan_addr *dst; - union vxlan_addr remote_ip, local_ip; + union vxlan_addr remote_ip; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; - __u8 tos, ttl, flow_flags = 0; + __u8 tos, ttl; int ifindex; int err; u32 flags = vxlan->cfg.flags; + bool use_cache; bool udp_sum = false; bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); __be32 vni = 0; #if IS_ENABLED(CONFIG_IPV6) + union vxlan_addr local_ip; __be32 label; #endif info = skb_tunnel_info(skb); + use_cache = ip_tunnel_dst_cache_usable(skb, info); if (rdst) { dst = &rdst->remote_ip; + memset(&key, 0, sizeof(key)); + pkey = &key; + if (vxlan_addr_any(dst)) { if (did_rsc) { /* short-circuited back to local bridge */ @@ -2455,7 +2412,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = (rdst->remote_vni) ? : default_vni; ifindex = rdst->remote_ifindex; - local_ip = vxlan->cfg.saddr; + + if (dst->sa.sa_family == AF_INET) { + key.u.ipv4.src = vxlan->cfg.saddr.sin.sin_addr.s_addr; + key.u.ipv4.dst = rdst->remote_ip.sin.sin_addr.s_addr; + } else { + key.u.ipv6.src = vxlan->cfg.saddr.sin6.sin6_addr; + key.u.ipv6.dst = rdst->remote_ip.sin6.sin6_addr; + } + dst_cache = &rdst->dst_cache; md->gbp = skb->mark; if (flags & VXLAN_F_TTL_INHERIT) { @@ -2469,12 +2434,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = vxlan->cfg.tos; if (tos == 1) tos = ip_tunnel_get_dsfield(old_iph, skb); + if (tos && !info) + use_cache = false; if (dst->sa.sa_family == AF_INET) udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); else udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); #if IS_ENABLED(CONFIG_IPV6) + local_ip = vxlan->cfg.saddr; label = vxlan->cfg.label; #endif } else { @@ -2486,14 +2454,15 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; - local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src; } else { remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; +#if IS_ENABLED(CONFIG_IPV6) local_ip.sin6.sin6_addr = info->key.u.ipv6.src; +#endif } dst = &remote_ip; + pkey = &info->key; dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - flow_flags = info->key.flow_flags; vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; @@ -2517,15 +2486,14 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; __be16 df = 0; + __be32 saddr; if (!ifindex) ifindex = sock4->sock->sk->sk_bound_dev_if; - rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos, - dst->sin.sin_addr.s_addr, - &local_ip.sin.sin_addr.s_addr, - dst_port, src_port, flow_flags, - dst_cache, info); + rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, ifindex, + &saddr, pkey, src_port, dst_port, + tos, use_cache ? dst_cache : NULL); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto tx_error; @@ -2561,16 +2529,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } else if (err) { if (info) { struct ip_tunnel_info *unclone; - struct in_addr src, dst; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) goto tx_error; - src = remote_ip.sin.sin_addr; - dst = local_ip.sin.sin_addr; - unclone->key.u.ipv4.src = src.s_addr; - unclone->key.u.ipv4.dst = dst.s_addr; + unclone->key.u.ipv4.src = pkey->u.ipv4.dst; + unclone->key.u.ipv4.dst = saddr; } vxlan_encap_bypass(skb, vxlan, vxlan, vni, false); dst_release(ndst); @@ -2584,8 +2549,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) goto tx_error; - udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, local_ip.sin.sin_addr.s_addr, - dst->sin.sin_addr.s_addr, tos, ttl, df, + udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr, + pkey->u.ipv4.dst, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { @@ -3022,9 +2987,101 @@ static int vxlan_open(struct net_device *dev) return ret; } +struct vxlan_fdb_flush_desc { + bool ignore_default_entry; + unsigned long state; + unsigned long state_mask; + unsigned long flags; + unsigned long flags_mask; + __be32 src_vni; + u32 nhid; + __be32 vni; + __be16 port; + union vxlan_addr dst_ip; +}; + +static bool vxlan_fdb_is_default_entry(const struct vxlan_fdb *f, + const struct vxlan_dev *vxlan) +{ + return is_zero_ether_addr(f->eth_addr) && f->vni == vxlan->cfg.vni; +} + +static bool vxlan_fdb_nhid_matches(const struct vxlan_fdb *f, u32 nhid) +{ + struct nexthop *nh = rtnl_dereference(f->nh); + + return nh && nh->id == nhid; +} + +static bool vxlan_fdb_flush_matches(const struct vxlan_fdb *f, + const struct vxlan_dev *vxlan, + const struct vxlan_fdb_flush_desc *desc) +{ + if (desc->state_mask && (f->state & desc->state_mask) != desc->state) + return false; + + if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags) + return false; + + if (desc->ignore_default_entry && vxlan_fdb_is_default_entry(f, vxlan)) + return false; + + if (desc->src_vni && f->vni != desc->src_vni) + return false; + + if (desc->nhid && !vxlan_fdb_nhid_matches(f, desc->nhid)) + return false; + + return true; +} + +static bool +vxlan_fdb_flush_should_match_remotes(const struct vxlan_fdb_flush_desc *desc) +{ + return desc->vni || desc->port || desc->dst_ip.sa.sa_family; +} + +static bool +vxlan_fdb_flush_remote_matches(const struct vxlan_fdb_flush_desc *desc, + const struct vxlan_rdst *rd) +{ + if (desc->vni && rd->remote_vni != desc->vni) + return false; + + if (desc->port && rd->remote_port != desc->port) + return false; + + if (desc->dst_ip.sa.sa_family && + !vxlan_addr_equal(&rd->remote_ip, &desc->dst_ip)) + return false; + + return true; +} + +static void +vxlan_fdb_flush_match_remotes(struct vxlan_fdb *f, struct vxlan_dev *vxlan, + const struct vxlan_fdb_flush_desc *desc, + bool *p_destroy_fdb) +{ + bool remotes_flushed = false; + struct vxlan_rdst *rd, *tmp; + + list_for_each_entry_safe(rd, tmp, &f->remotes, list) { + if (!vxlan_fdb_flush_remote_matches(desc, rd)) + continue; + + vxlan_fdb_dst_destroy(vxlan, f, rd, true); + remotes_flushed = true; + } + + *p_destroy_fdb = remotes_flushed && list_empty(&f->remotes); +} + /* Purge the forwarding table */ -static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) +static void vxlan_flush(struct vxlan_dev *vxlan, + const struct vxlan_fdb_flush_desc *desc) { + bool match_remotes = vxlan_fdb_flush_should_match_remotes(desc); unsigned int h; for (h = 0; h < FDB_HASH_SIZE; ++h) { @@ -3034,28 +3091,122 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { struct vxlan_fdb *f = container_of(p, struct vxlan_fdb, hlist); - if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP))) - continue; - /* the all_zeros_mac entry is deleted at vxlan_uninit */ - if (is_zero_ether_addr(f->eth_addr) && - f->vni == vxlan->cfg.vni) + + if (!vxlan_fdb_flush_matches(f, vxlan, desc)) continue; + + if (match_remotes) { + bool destroy_fdb = false; + + vxlan_fdb_flush_match_remotes(f, vxlan, desc, + &destroy_fdb); + + if (!destroy_fdb) + continue; + } + vxlan_fdb_destroy(vxlan, f, true, true); } spin_unlock_bh(&vxlan->hash_lock[h]); } } +static const struct nla_policy vxlan_del_bulk_policy[NDA_MAX + 1] = { + [NDA_SRC_VNI] = { .type = NLA_U32 }, + [NDA_NH_ID] = { .type = NLA_U32 }, + [NDA_VNI] = { .type = NLA_U32 }, + [NDA_PORT] = { .type = NLA_U16 }, + [NDA_DST] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), + sizeof(struct in6_addr)), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + +#define VXLAN_FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF) +#define VXLAN_FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP) +#define VXLAN_FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_EXT_LEARNED | NTF_OFFLOADED | \ + NTF_ROUTER) + +static int vxlan_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb_flush_desc desc = {}; + struct ndmsg *ndm = nlmsg_data(nlh); + struct nlattr *tb[NDA_MAX + 1]; + u8 ndm_flags; + int err; + + ndm_flags = ndm->ndm_flags & ~VXLAN_FDB_FLUSH_IGNORED_NDM_FLAGS; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, vxlan_del_bulk_policy, + extack); + if (err) + return err; + + if (ndm_flags & ~VXLAN_FDB_FLUSH_ALLOWED_NDM_FLAGS) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); + return -EINVAL; + } + if (ndm->ndm_state & ~VXLAN_FDB_FLUSH_ALLOWED_NDM_STATES) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set"); + return -EINVAL; + } + + desc.state = ndm->ndm_state; + desc.flags = ndm_flags; + + if (tb[NDA_NDM_STATE_MASK]) + desc.state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]); + + if (tb[NDA_NDM_FLAGS_MASK]) + desc.flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]); + + if (tb[NDA_SRC_VNI]) + desc.src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI])); + + if (tb[NDA_NH_ID]) + desc.nhid = nla_get_u32(tb[NDA_NH_ID]); + + if (tb[NDA_VNI]) + desc.vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI])); + + if (tb[NDA_PORT]) + desc.port = nla_get_be16(tb[NDA_PORT]); + + if (tb[NDA_DST]) { + union vxlan_addr ip; + + err = vxlan_nla_get_addr(&ip, tb[NDA_DST]); + if (err) { + NL_SET_ERR_MSG_ATTR(extack, tb[NDA_DST], + "Unsupported address family"); + return err; + } + desc.dst_ip = ip; + } + + vxlan_flush(vxlan, &desc); + + return 0; +} + /* Cleanup timer and forwarding table on shutdown */ static int vxlan_stop(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb_flush_desc desc = { + /* Default entry is deleted at vxlan_uninit. */ + .ignore_default_entry = true, + .state = 0, + .state_mask = NUD_PERMANENT | NUD_NOARP, + }; vxlan_multicast_leave(vxlan); del_timer_sync(&vxlan->age_timer); - vxlan_flush(vxlan, false); + vxlan_flush(vxlan, &desc); vxlan_sock_release(vxlan); return 0; @@ -3100,11 +3251,14 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, - info->key.u.ipv4.dst, - &info->key.u.ipv4.src, dport, sport, - info->key.flow_flags, &info->dst_cache, - info); + if (!sock4) + return -EIO; + + rt = udp_tunnel_dst_lookup(skb, dev, vxlan->net, 0, + &info->key.u.ipv4.src, + &info->key, + sport, dport, info->key.tos, + &info->dst_cache); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); @@ -3142,6 +3296,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_fdb_add = vxlan_fdb_add, .ndo_fdb_del = vxlan_fdb_delete, + .ndo_fdb_del_bulk = vxlan_fdb_delete_bulk, .ndo_fdb_dump = vxlan_fdb_dump, .ndo_fdb_get = vxlan_fdb_get, .ndo_mdb_add = vxlan_mdb_add, @@ -4294,8 +4449,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], static void vxlan_dellink(struct net_device *dev, struct list_head *head) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb_flush_desc desc = { + /* Default entry is deleted at vxlan_uninit. */ + .ignore_default_entry = true, + }; - vxlan_flush(vxlan, true); + vxlan_flush(vxlan, &desc); list_del(&vxlan->next); unregister_netdevice_queue(dev, head); diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index e46b7f5ee49e..b09f4c235142 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -687,10 +687,10 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget) napi_complete(napi); qmgr_enable_irq(rxq); if (!qmgr_stat_empty(rxq) && - napi_reschedule(napi)) { + napi_schedule(napi)) { #if DEBUG_RX printk(KERN_DEBUG "%s: hss_hdlc_poll" - " napi_reschedule succeeded\n", + " napi_schedule succeeded\n", dev->name); #endif qmgr_disable_irq(rxq); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 23f366221939..2f8c785277af 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3148,7 +3148,7 @@ static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) * immediate servicing. */ if (ath10k_ce_interrupt_summary(ar)) { - napi_reschedule(ctx); + napi_schedule(ctx); goto out; } ath10k_pci_enable_legacy_irq(ar); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 23b5a0adcbd6..146bc7bd14fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1660,9 +1660,7 @@ irqreturn_t iwl_pcie_irq_rx_msix_handler(int irq, void *dev_id) IWL_DEBUG_ISR(trans, "[%d] Got interrupt\n", entry->entry); local_bh_disable(); - if (napi_schedule_prep(&rxq->napi)) - __napi_schedule(&rxq->napi); - else + if (!napi_schedule(&rxq->napi)) iwl_pcie_clear_irq(trans, entry->entry); local_bh_enable(); diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c index f4ff2198b5ef..210d84c67ef9 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c @@ -852,7 +852,7 @@ int t7xx_dpmaif_napi_rx_poll(struct napi_struct *napi, const int budget) if (!ret) { napi_complete_done(napi, work_done); rxq->sleep_lock_pending = true; - napi_reschedule(napi); + napi_schedule(napi); return work_done; } diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c index 80edb8e75a6a..0bc97430211b 100644 --- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c +++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c @@ -445,7 +445,8 @@ int t7xx_fsm_append_event(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_event_state ev return -EINVAL; } - event = kmalloc(sizeof(*event) + length, in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); + event = kmalloc(struct_size(event, data, length), + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); if (!event) return -ENOMEM; diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.h b/drivers/net/wwan/t7xx/t7xx_state_monitor.h index b6e76f3903c8..b0b3662ae6d7 100644 --- a/drivers/net/wwan/t7xx/t7xx_state_monitor.h +++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.h @@ -102,7 +102,7 @@ struct t7xx_fsm_event { struct list_head entry; enum t7xx_fsm_event_state event_id; unsigned int length; - unsigned char data[]; + unsigned char data[] __counted_by(length); }; struct t7xx_fsm_command { diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 87df60916960..72e01e550a16 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -302,7 +302,7 @@ static void wwan_remove_dev(struct wwan_device *wwandev) static const struct { const char * const name; /* Port type name */ - const char * const devsuf; /* Port devce name suffix */ + const char * const devsuf; /* Port device name suffix */ } wwan_port_types[WWAN_PORT_MAX + 1] = { [WWAN_PORT_AT] = { .name = "AT", @@ -1184,7 +1184,7 @@ void wwan_unregister_ops(struct device *parent) */ put_device(&wwandev->dev); - rtnl_lock(); /* Prevent concurent netdev(s) creation/destroying */ + rtnl_lock(); /* Prevent concurrent netdev(s) creation/destroying */ /* Remove all child netdev(s), using batch removing */ device_for_each_child(&wwandev->dev, &kill_list, diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f3f2c07423a6..db304f178136 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -41,8 +41,6 @@ #include <asm/xen/hypercall.h> #include <xen/balloon.h> -#define XENVIF_QUEUE_LENGTH 32 - /* Number of bytes allowed on the internal guest Rx queue. */ #define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE) @@ -254,6 +252,9 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); + /* timestamp packet in software */ + skb_tx_timestamp(skb); + if (!xenvif_rx_queue_tail(queue, skb)) goto drop; @@ -460,7 +461,7 @@ static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data) static const struct ethtool_ops xenvif_ethtool_ops = { .get_link = ethtool_op_get_link, - + .get_ts_info = ethtool_op_get_ts_info, .get_sset_count = xenvif_get_sset_count, .get_ethtool_stats = xenvif_get_ethtool_stats, .get_strings = xenvif_get_strings, @@ -530,8 +531,6 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->features = dev->hw_features | NETIF_F_RXCSUM; dev->ethtool_ops = &xenvif_ethtool_ops; - dev->tx_queue_len = XENVIF_QUEUE_LENGTH; - dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_MAX_MTU - VLAN_ETH_HLEN; diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 0a3483e247a8..f63250c650ca 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -890,13 +890,13 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action, { struct of_changeset_entry *ce; + if (WARN_ON(action >= ARRAY_SIZE(action_names))) + return -EINVAL; + ce = kzalloc(sizeof(*ce), GFP_KERNEL); if (!ce) return -ENOMEM; - if (WARN_ON(action >= ARRAY_SIZE(action_names))) - return -EINVAL; - /* get a reference to the node */ ce->action = action; ce->np = of_node_get(np); diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index dfb6fb962fc7..a9a292d6d59b 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -45,8 +45,8 @@ struct target { /** * struct fragment - info about fragment nodes in overlay expanded device tree - * @target: target of the overlay operation * @overlay: pointer to the __overlay__ node + * @target: target of the overlay operation */ struct fragment { struct device_node *overlay; diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index e2f29404c84e..64420ecc24d1 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -43,7 +43,6 @@ #define PARF_PHY_REFCLK 0x4c #define PARF_CONFIG_BITS 0x50 #define PARF_DBI_BASE_ADDR 0x168 -#define PARF_SLV_ADDR_SPACE_SIZE_2_3_3 0x16c /* Register offset specific to IP ver 2.3.3 */ #define PARF_MHI_CLOCK_RESET_CTRL 0x174 #define PARF_AXI_MSTR_WR_ADDR_HALT 0x178 #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 @@ -797,8 +796,7 @@ static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie) u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); u32 val; - writel(SLV_ADDR_SPACE_SZ, - pcie->parf + PARF_SLV_ADDR_SPACE_SIZE_2_3_3); + writel(SLV_ADDR_SPACE_SZ, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE); val = readl(pcie->parf + PARF_PHY_CTRL); val &= ~PHY_TEST_PWR_DOWN; diff --git a/drivers/pci/of.c b/drivers/pci/of.c index 2af64bcb7da3..51e3dd0ea5ab 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -657,30 +657,33 @@ void of_pci_make_dev_node(struct pci_dev *pdev) cset = kmalloc(sizeof(*cset), GFP_KERNEL); if (!cset) - goto failed; + goto out_free_name; of_changeset_init(cset); np = of_changeset_create_node(cset, ppnode, name); if (!np) - goto failed; - np->data = cset; + goto out_destroy_cset; ret = of_pci_add_properties(pdev, cset, np); if (ret) - goto failed; + goto out_free_node; ret = of_changeset_apply(cset); if (ret) - goto failed; + goto out_free_node; + np->data = cset; pdev->dev.of_node = np; kfree(name); return; -failed: - if (np) - of_node_put(np); +out_free_node: + of_node_put(np); +out_destroy_cset: + of_changeset_destroy(cset); + kfree(cset); +out_free_name: kfree(name); } #endif diff --git a/drivers/pci/of_property.c b/drivers/pci/of_property.c index 710ec35ba4a1..c2c7334152bc 100644 --- a/drivers/pci/of_property.c +++ b/drivers/pci/of_property.c @@ -186,8 +186,8 @@ static int of_pci_prop_interrupts(struct pci_dev *pdev, static int of_pci_prop_intr_map(struct pci_dev *pdev, struct of_changeset *ocs, struct device_node *np) { + u32 i, addr_sz[OF_PCI_MAX_INT_PIN] = { 0 }, map_sz = 0; struct of_phandle_args out_irq[OF_PCI_MAX_INT_PIN]; - u32 i, addr_sz[OF_PCI_MAX_INT_PIN], map_sz = 0; __be32 laddr[OF_PCI_ADDRESS_CELLS] = { 0 }; u32 int_map_mask[] = { 0xffff00, 0, 0, 7 }; struct device_node *pnode; @@ -213,33 +213,44 @@ static int of_pci_prop_intr_map(struct pci_dev *pdev, struct of_changeset *ocs, out_irq[i].args[0] = pin; ret = of_irq_parse_raw(laddr, &out_irq[i]); if (ret) { - pci_err(pdev, "parse irq %d failed, ret %d", pin, ret); + out_irq[i].np = NULL; + pci_dbg(pdev, "parse irq %d failed, ret %d", pin, ret); continue; } - ret = of_property_read_u32(out_irq[i].np, "#address-cells", - &addr_sz[i]); - if (ret) - addr_sz[i] = 0; + of_property_read_u32(out_irq[i].np, "#address-cells", + &addr_sz[i]); } list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) { i = pci_swizzle_interrupt_pin(child, pin) - 1; + if (!out_irq[i].np) + continue; map_sz += 5 + addr_sz[i] + out_irq[i].args_count; } } + /* + * Parsing interrupt failed for all pins. In this case, it does not + * need to generate interrupt-map property. + */ + if (!map_sz) + return 0; + int_map = kcalloc(map_sz, sizeof(u32), GFP_KERNEL); mapp = int_map; list_for_each_entry(child, &pdev->subordinate->devices, bus_list) { for (pin = 1; pin <= OF_PCI_MAX_INT_PIN; pin++) { + i = pci_swizzle_interrupt_pin(child, pin) - 1; + if (!out_irq[i].np) + continue; + *mapp = (child->bus->number << 16) | (child->devfn << 8); mapp += OF_PCI_ADDRESS_CELLS; *mapp = pin; mapp++; - i = pci_swizzle_interrupt_pin(child, pin) - 1; *mapp = out_irq[i].np->phandle; mapp++; if (addr_sz[i]) { diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index a79c110c7e51..51ec9e7e784f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -572,7 +572,19 @@ static void pci_pm_default_resume_early(struct pci_dev *pci_dev) static void pci_pm_bridge_power_up_actions(struct pci_dev *pci_dev) { - pci_bridge_wait_for_secondary_bus(pci_dev, "resume"); + int ret; + + ret = pci_bridge_wait_for_secondary_bus(pci_dev, "resume"); + if (ret) { + /* + * The downstream link failed to come up, so mark the + * devices below as disconnected to make sure we don't + * attempt to resume them. + */ + pci_walk_bus(pci_dev->subordinate, pci_dev_set_disconnected, + NULL); + return; + } /* * When powering on a bridge from D3cold, the whole hierarchy may be diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 913dc04b3a40..6b50bc551984 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1972,7 +1972,7 @@ static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) u64 delta; int i; - for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) { + for (i = 0; i < CMN_DT_NUM_COUNTERS; i++) { if (status & (1U << i)) { ret = IRQ_HANDLED; if (WARN_ON(!dtc->counters[i])) diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index 4f036c77284e..e2187767ce00 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -127,6 +127,10 @@ struct lynx_28g_lane { struct lynx_28g_priv { void __iomem *base; struct device *dev; + /* Serialize concurrent access to registers shared between lanes, + * like PCCn + */ + spinlock_t pcc_lock; struct lynx_28g_pll pll[LYNX_28G_NUM_PLL]; struct lynx_28g_lane lane[LYNX_28G_NUM_LANE]; @@ -397,6 +401,8 @@ static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) if (powered_up) lynx_28g_power_off(phy); + spin_lock(&priv->pcc_lock); + switch (submode) { case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_1000BASEX: @@ -413,6 +419,8 @@ static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) lane->interface = submode; out: + spin_unlock(&priv->pcc_lock); + /* Power up the lane if necessary */ if (powered_up) lynx_28g_power_on(phy); @@ -508,11 +516,12 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) for (i = 0; i < LYNX_28G_NUM_LANE; i++) { lane = &priv->lane[i]; - if (!lane->init) - continue; + mutex_lock(&lane->phy->mutex); - if (!lane->powered_up) + if (!lane->init || !lane->powered_up) { + mutex_unlock(&lane->phy->mutex); continue; + } rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); if (!(rrstctl & LYNX_28G_LNaRRSTCTL_CDR_LOCK)) { @@ -521,6 +530,8 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); } while (!(rrstctl & LYNX_28G_LNaRRSTCTL_RST_DONE)); } + + mutex_unlock(&lane->phy->mutex); } queue_delayed_work(system_power_efficient_wq, &priv->cdr_check, msecs_to_jiffies(1000)); @@ -593,6 +604,7 @@ static int lynx_28g_probe(struct platform_device *pdev) dev_set_drvdata(dev, priv); + spin_lock_init(&priv->pcc_lock); INIT_DELAYED_WORK(&priv->cdr_check, lynx_28g_cdr_lock_check); queue_delayed_work(system_power_efficient_wq, &priv->cdr_check, @@ -604,6 +616,14 @@ static int lynx_28g_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(provider); } +static void lynx_28g_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct lynx_28g_priv *priv = dev_get_drvdata(dev); + + cancel_delayed_work_sync(&priv->cdr_check); +} + static const struct of_device_id lynx_28g_of_match_table[] = { { .compatible = "fsl,lynx-28g" }, { }, @@ -612,6 +632,7 @@ MODULE_DEVICE_TABLE(of, lynx_28g_of_match_table); static struct platform_driver lynx_28g_driver = { .probe = lynx_28g_probe, + .remove_new = lynx_28g_remove, .driver = { .name = "lynx-28g", .of_match_table = lynx_28g_of_match_table, diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index e9dc9638120a..e2f7519bef04 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1022,17 +1022,20 @@ static int add_setting(struct pinctrl *p, struct pinctrl_dev *pctldev, static struct pinctrl *find_pinctrl(struct device *dev) { - struct pinctrl *p; + struct pinctrl *entry, *p = NULL; mutex_lock(&pinctrl_list_mutex); - list_for_each_entry(p, &pinctrl_list, node) - if (p->dev == dev) { - mutex_unlock(&pinctrl_list_mutex); - return p; + + list_for_each_entry(entry, &pinctrl_list, node) { + if (entry->dev == dev) { + p = entry; + kref_get(&p->users); + break; } + } mutex_unlock(&pinctrl_list_mutex); - return NULL; + return p; } static void pinctrl_free(struct pinctrl *p, bool inlist); @@ -1140,7 +1143,6 @@ struct pinctrl *pinctrl_get(struct device *dev) p = find_pinctrl(dev); if (p) { dev_dbg(dev, "obtain a copy of previously claimed pinctrl\n"); - kref_get(&p->users); return p; } diff --git a/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c b/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c index 2d1c1652cfd9..8a9961ac8712 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c +++ b/drivers/pinctrl/nuvoton/pinctrl-wpcm450.c @@ -1062,13 +1062,13 @@ static int wpcm450_gpio_register(struct platform_device *pdev, if (ret < 0) return ret; - gpio = &pctrl->gpio_bank[reg]; - gpio->pctrl = pctrl; - if (reg >= WPCM450_NUM_BANKS) return dev_err_probe(dev, -EINVAL, "GPIO index %d out of range!\n", reg); + gpio = &pctrl->gpio_bank[reg]; + gpio->pctrl = pctrl; + bank = &wpcm450_banks[reg]; gpio->bank = bank; diff --git a/drivers/pinctrl/pinctrl-lantiq.h b/drivers/pinctrl/pinctrl-lantiq.h index efb25fc34f14..1ac49ae638de 100644 --- a/drivers/pinctrl/pinctrl-lantiq.h +++ b/drivers/pinctrl/pinctrl-lantiq.h @@ -198,5 +198,4 @@ enum ltq_pin { extern int ltq_pinctrl_register(struct platform_device *pdev, struct ltq_pinmux_info *info); -extern int ltq_pinctrl_unregister(struct platform_device *pdev); #endif /* __PINCTRL_LANTIQ_H */ diff --git a/drivers/pinctrl/renesas/Kconfig b/drivers/pinctrl/renesas/Kconfig index 77730dc548ed..c8d519ca53eb 100644 --- a/drivers/pinctrl/renesas/Kconfig +++ b/drivers/pinctrl/renesas/Kconfig @@ -235,6 +235,7 @@ config PINCTRL_RZN1 depends on OF depends on ARCH_RZN1 || COMPILE_TEST select GENERIC_PINCONF + select PINMUX help This selects pinctrl driver for Renesas RZ/N1 devices. diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c index 4bfe3aa57f8a..cf42e204cbf0 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-aon.c @@ -31,6 +31,8 @@ #define JH7110_AON_NGPIO 4 #define JH7110_AON_GC_BASE 64 +#define JH7110_AON_REGS_NUM 37 + /* registers */ #define JH7110_AON_DOEN 0x0 #define JH7110_AON_DOUT 0x4 @@ -145,6 +147,7 @@ static const struct jh7110_pinctrl_soc_info jh7110_aon_pinctrl_info = { .gpi_mask = GENMASK(3, 0), .gpioin_reg_base = JH7110_AON_GPIOIN, .irq_reg = &jh7110_aon_irq_reg, + .nsaved_regs = JH7110_AON_REGS_NUM, .jh7110_set_one_pin_mux = jh7110_aon_set_one_pin_mux, .jh7110_get_padcfg_base = jh7110_aon_get_padcfg_base, .jh7110_gpio_irq_handler = jh7110_aon_irq_handler, @@ -165,6 +168,7 @@ static struct platform_driver jh7110_aon_pinctrl_driver = { .driver = { .name = "starfive-jh7110-aon-pinctrl", .of_match_table = jh7110_aon_pinctrl_of_match, + .pm = pm_sleep_ptr(&jh7110_pinctrl_pm_ops), }, }; module_platform_driver(jh7110_aon_pinctrl_driver); diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c index 20c85db1cd3a..03c2ad808d61 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110-sys.c @@ -31,6 +31,8 @@ #define JH7110_SYS_NGPIO 64 #define JH7110_SYS_GC_BASE 0 +#define JH7110_SYS_REGS_NUM 174 + /* registers */ #define JH7110_SYS_DOEN 0x000 #define JH7110_SYS_DOUT 0x040 @@ -417,6 +419,7 @@ static const struct jh7110_pinctrl_soc_info jh7110_sys_pinctrl_info = { .gpi_mask = GENMASK(6, 0), .gpioin_reg_base = JH7110_SYS_GPIOIN, .irq_reg = &jh7110_sys_irq_reg, + .nsaved_regs = JH7110_SYS_REGS_NUM, .jh7110_set_one_pin_mux = jh7110_sys_set_one_pin_mux, .jh7110_get_padcfg_base = jh7110_sys_get_padcfg_base, .jh7110_gpio_irq_handler = jh7110_sys_irq_handler, @@ -437,6 +440,7 @@ static struct platform_driver jh7110_sys_pinctrl_driver = { .driver = { .name = "starfive-jh7110-sys-pinctrl", .of_match_table = jh7110_sys_pinctrl_of_match, + .pm = pm_sleep_ptr(&jh7110_pinctrl_pm_ops), }, }; module_platform_driver(jh7110_sys_pinctrl_driver); diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index b9081805c8f6..640f827a9b2c 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -872,6 +872,13 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) if (!sfp) return -ENOMEM; +#if IS_ENABLED(CONFIG_PM_SLEEP) + sfp->saved_regs = devm_kcalloc(dev, info->nsaved_regs, + sizeof(*sfp->saved_regs), GFP_KERNEL); + if (!sfp->saved_regs) + return -ENOMEM; +#endif + sfp->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sfp->base)) return PTR_ERR(sfp->base); @@ -967,14 +974,45 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "could not register gpiochip\n"); - irq_domain_set_pm_device(sfp->gc.irq.domain, dev); - dev_info(dev, "StarFive GPIO chip registered %d GPIOs\n", sfp->gc.ngpio); return pinctrl_enable(sfp->pctl); } EXPORT_SYMBOL_GPL(jh7110_pinctrl_probe); +static int jh7110_pinctrl_suspend(struct device *dev) +{ + struct jh7110_pinctrl *sfp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + raw_spin_lock_irqsave(&sfp->lock, flags); + for (i = 0 ; i < sfp->info->nsaved_regs ; i++) + sfp->saved_regs[i] = readl_relaxed(sfp->base + 4 * i); + + raw_spin_unlock_irqrestore(&sfp->lock, flags); + return 0; +} + +static int jh7110_pinctrl_resume(struct device *dev) +{ + struct jh7110_pinctrl *sfp = dev_get_drvdata(dev); + unsigned long flags; + unsigned int i; + + raw_spin_lock_irqsave(&sfp->lock, flags); + for (i = 0 ; i < sfp->info->nsaved_regs ; i++) + writel_relaxed(sfp->saved_regs[i], sfp->base + 4 * i); + + raw_spin_unlock_irqrestore(&sfp->lock, flags); + return 0; +} + +const struct dev_pm_ops jh7110_pinctrl_pm_ops = { + LATE_SYSTEM_SLEEP_PM_OPS(jh7110_pinctrl_suspend, jh7110_pinctrl_resume) +}; +EXPORT_SYMBOL_GPL(jh7110_pinctrl_pm_ops); + MODULE_DESCRIPTION("Pinctrl driver for the StarFive JH7110 SoC"); MODULE_AUTHOR("Emil Renner Berthing <kernel@esmil.dk>"); MODULE_AUTHOR("Jianlong Huang <jianlong.huang@starfivetech.com>"); diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h index 3f20b7ff96dd..a33d0d4e1382 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.h @@ -21,6 +21,7 @@ struct jh7110_pinctrl { /* register read/write mutex */ struct mutex mutex; const struct jh7110_pinctrl_soc_info *info; + u32 *saved_regs; }; struct jh7110_gpio_irq_reg { @@ -50,6 +51,8 @@ struct jh7110_pinctrl_soc_info { const struct jh7110_gpio_irq_reg *irq_reg; + unsigned int nsaved_regs; + /* generic pinmux */ int (*jh7110_set_one_pin_mux)(struct jh7110_pinctrl *sfp, unsigned int pin, @@ -66,5 +69,6 @@ void jh7110_set_gpiomux(struct jh7110_pinctrl *sfp, unsigned int pin, unsigned int din, u32 dout, u32 doen); int jh7110_pinctrl_probe(struct platform_device *pdev); struct jh7110_pinctrl *jh7110_from_irq_desc(struct irq_desc *desc); +extern const struct dev_pm_ops jh7110_pinctrl_pm_ops; #endif /* __PINCTRL_STARFIVE_JH7110_H__ */ diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.c b/drivers/pinctrl/tegra/pinctrl-tegra.c index cfeda5b3e048..734c71ef005b 100644 --- a/drivers/pinctrl/tegra/pinctrl-tegra.c +++ b/drivers/pinctrl/tegra/pinctrl-tegra.c @@ -96,7 +96,6 @@ static const struct cfg_param { {"nvidia,slew-rate-falling", TEGRA_PINCONF_PARAM_SLEW_RATE_FALLING}, {"nvidia,slew-rate-rising", TEGRA_PINCONF_PARAM_SLEW_RATE_RISING}, {"nvidia,drive-type", TEGRA_PINCONF_PARAM_DRIVE_TYPE}, - {"nvidia,function", TEGRA_PINCONF_PARAM_FUNCTION}, }; static int tegra_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, @@ -471,12 +470,6 @@ static int tegra_pinconf_reg(struct tegra_pmx *pmx, *bit = g->drvtype_bit; *width = 2; break; - case TEGRA_PINCONF_PARAM_FUNCTION: - *bank = g->mux_bank; - *reg = g->mux_reg; - *bit = g->mux_bit; - *width = 2; - break; default: dev_err(pmx->dev, "Invalid config param %04x\n", param); return -ENOTSUPP; @@ -640,16 +633,8 @@ static void tegra_pinconf_group_dbg_show(struct pinctrl_dev *pctldev, val >>= bit; val &= (1 << width) - 1; - if (cfg_params[i].param == TEGRA_PINCONF_PARAM_FUNCTION) { - u8 idx = pmx->soc->groups[group].funcs[val]; - - seq_printf(s, "\n\t%s=%s", - strip_prefix(cfg_params[i].property), - pmx->functions[idx].name); - } else { - seq_printf(s, "\n\t%s=%u", - strip_prefix(cfg_params[i].property), val); - } + seq_printf(s, "\n\t%s=%u", + strip_prefix(cfg_params[i].property), val); } } diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.h b/drivers/pinctrl/tegra/pinctrl-tegra.h index e728efeaa4de..b3289bdf727d 100644 --- a/drivers/pinctrl/tegra/pinctrl-tegra.h +++ b/drivers/pinctrl/tegra/pinctrl-tegra.h @@ -54,8 +54,6 @@ enum tegra_pinconf_param { TEGRA_PINCONF_PARAM_SLEW_RATE_RISING, /* argument: Integer, range is HW-dependant */ TEGRA_PINCONF_PARAM_DRIVE_TYPE, - /* argument: pinmux settings */ - TEGRA_PINCONF_PARAM_FUNCTION, }; enum tegra_pinconf_pull { diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index f3696a54a2bd..fd38d8c8371e 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -53,7 +53,7 @@ struct mlxbf_tmfifo; /** - * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring + * struct mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring * @va: virtual address of the ring * @dma: dma address of the ring * @vq: pointer to the virtio virtqueue @@ -113,12 +113,13 @@ enum { }; /** - * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device + * struct mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device * @vdev: virtio device, in which the vdev.id.device field has the * VIRTIO_ID_xxx id to distinguish the virtual device. * @status: status of the device * @features: supported features of the device * @vrings: array of tmfifo vrings of this device + * @config: non-anonymous union for cons and net * @config.cons: virtual console config - * select if vdev.id.device is VIRTIO_ID_CONSOLE * @config.net: virtual network config - @@ -138,7 +139,7 @@ struct mlxbf_tmfifo_vdev { }; /** - * mlxbf_tmfifo_irq_info - Structure of the interrupt information + * struct mlxbf_tmfifo_irq_info - Structure of the interrupt information * @fifo: pointer to the tmfifo structure * @irq: interrupt number * @index: index into the interrupt array @@ -150,7 +151,7 @@ struct mlxbf_tmfifo_irq_info { }; /** - * mlxbf_tmfifo_io - Structure of the TmFifo IO resource (for both rx & tx) + * struct mlxbf_tmfifo_io - Structure of the TmFifo IO resource (for both rx & tx) * @ctl: control register offset (TMFIFO_RX_CTL / TMFIFO_TX_CTL) * @sts: status register offset (TMFIFO_RX_STS / TMFIFO_TX_STS) * @data: data register offset (TMFIFO_RX_DATA / TMFIFO_TX_DATA) @@ -162,7 +163,7 @@ struct mlxbf_tmfifo_io { }; /** - * mlxbf_tmfifo - Structure of the TmFifo + * struct mlxbf_tmfifo - Structure of the TmFifo * @vdev: array of the virtual devices running over the TmFifo * @lock: lock to protect the TmFifo access * @res0: mapped resource block 0 @@ -198,7 +199,7 @@ struct mlxbf_tmfifo { }; /** - * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header + * struct mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header * @type: message type * @len: payload length in network byte order. Messages sent into the FIFO * will be read by the other side as data stream in the same byte order. @@ -208,6 +209,7 @@ struct mlxbf_tmfifo { struct mlxbf_tmfifo_msg_hdr { u8 type; __be16 len; + /* private: */ u8 unused[5]; } __packed __aligned(sizeof(u64)); diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 8c4f9e12f018..5798b49ddaba 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -659,7 +659,7 @@ static int hp_init_bios_package_attribute(enum hp_wmi_data_type attr_type, const char *guid, int min_elements, int instance_id) { - struct kobject *attr_name_kobj; + struct kobject *attr_name_kobj, *duplicate; union acpi_object *elements; struct kset *temp_kset; @@ -704,8 +704,11 @@ static int hp_init_bios_package_attribute(enum hp_wmi_data_type attr_type, } /* All duplicate attributes found are ignored */ - if (kset_find_obj(temp_kset, str_value)) { + duplicate = kset_find_obj(temp_kset, str_value); + if (duplicate) { pr_debug("Duplicate attribute name found - %s\n", str_value); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); goto pack_attr_exit; } @@ -768,7 +771,7 @@ static int hp_init_bios_buffer_attribute(enum hp_wmi_data_type attr_type, const char *guid, int min_elements, int instance_id) { - struct kobject *attr_name_kobj; + struct kobject *attr_name_kobj, *duplicate; struct kset *temp_kset; char str[MAX_BUFF_SIZE]; @@ -794,8 +797,11 @@ static int hp_init_bios_buffer_attribute(enum hp_wmi_data_type attr_type, temp_kset = bioscfg_drv.main_dir_kset; /* All duplicate attributes found are ignored */ - if (kset_find_obj(temp_kset, str)) { + duplicate = kset_find_obj(temp_kset, str); + if (duplicate) { pr_debug("Duplicate attribute name found - %s\n", str); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); goto buff_attr_exit; } diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index e76e5458db35..8ebb7be52ee7 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -1548,7 +1548,13 @@ static const struct dev_pm_ops hp_wmi_pm_ops = { .restore = hp_wmi_resume_handler, }; -static struct platform_driver hp_wmi_driver = { +/* + * hp_wmi_bios_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver hp_wmi_driver __refdata = { .driver = { .name = "hp-wmi", .pm = &hp_wmi_pm_ops, diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 1061eb7ec399..43c864add778 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -331,14 +331,15 @@ int do_core_test(int cpu, struct device *dev) switch (test->test_num) { case IFS_TYPE_SAF: if (!ifsd->loaded) - return -EPERM; - ifs_test_core(cpu, dev); + ret = -EPERM; + else + ifs_test_core(cpu, dev); break; case IFS_TYPE_ARRAY_BIST: ifs_array_test_core(cpu, dev); break; default: - return -EINVAL; + ret = -EINVAL; } out: cpus_read_unlock(); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 79346881cadb..aee869769843 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1248,6 +1248,24 @@ static void tlmi_release_attr(void) kset_unregister(tlmi_priv.authentication_kset); } +static int tlmi_validate_setting_name(struct kset *attribute_kset, char *name) +{ + struct kobject *duplicate; + + if (!strcmp(name, "Reserved")) + return -EINVAL; + + duplicate = kset_find_obj(attribute_kset, name); + if (duplicate) { + pr_debug("Duplicate attribute name found - %s\n", name); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); + return -EBUSY; + } + + return 0; +} + static int tlmi_sysfs_init(void) { int i, ret; @@ -1276,10 +1294,8 @@ static int tlmi_sysfs_init(void) continue; /* check for duplicate or reserved values */ - if (kset_find_obj(tlmi_priv.attribute_kset, tlmi_priv.setting[i]->display_name) || - !strcmp(tlmi_priv.setting[i]->display_name, "Reserved")) { - pr_debug("duplicate or reserved attribute name found - %s\n", - tlmi_priv.setting[i]->display_name); + if (tlmi_validate_setting_name(tlmi_priv.attribute_kset, + tlmi_priv.setting[i]->display_name) < 0) { kfree(tlmi_priv.setting[i]->possible_values); kfree(tlmi_priv.setting[i]); tlmi_priv.setting[i] = NULL; diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index f9301a9382e7..0c6733772698 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -42,6 +42,21 @@ static const struct ts_dmi_data archos_101_cesium_educ_data = { .properties = archos_101_cesium_educ_props, }; +static const struct property_entry bush_bush_windows_tablet_props[] = { + PROPERTY_ENTRY_U32("touchscreen-size-x", 1850), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1280), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-bush-bush-windows-tablet.fw"), + { } +}; + +static const struct ts_dmi_data bush_bush_windows_tablet_data = { + .acpi_name = "MSSL1680:00", + .properties = bush_bush_windows_tablet_props, +}; + static const struct property_entry chuwi_hi8_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1665), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -756,6 +771,21 @@ static const struct ts_dmi_data pipo_w11_data = { .properties = pipo_w11_props, }; +static const struct property_entry positivo_c4128b_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 4), + PROPERTY_ENTRY_U32("touchscreen-min-y", 13), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1915), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1269), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-positivo-c4128b.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data positivo_c4128b_data = { + .acpi_name = "MSSL1680:00", + .properties = positivo_c4128b_props, +}; + static const struct property_entry pov_mobii_wintab_p800w_v20_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 32), PROPERTY_ENTRY_U32("touchscreen-min-y", 16), @@ -1071,6 +1101,13 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* Bush Windows tablet */ + .driver_data = (void *)&bush_bush_windows_tablet_data, + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Bush Windows tablet"), + }, + }, + { /* Chuwi Hi8 */ .driver_data = (void *)&chuwi_hi8_data, .matches = { @@ -1481,6 +1518,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* Positivo C4128B */ + .driver_data = (void *)&positivo_c4128b_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), + DMI_MATCH(DMI_PRODUCT_NAME, "C4128B-1"), + }, + }, + { /* Point of View mobii wintab p800w (v2.0) */ .driver_data = (void *)&pov_mobii_wintab_p800w_v20_data, .matches = { diff --git a/drivers/pmdomain/imx/scu-pd.c b/drivers/pmdomain/imx/scu-pd.c index 2f693b67ddb4..891c1d925a9d 100644 --- a/drivers/pmdomain/imx/scu-pd.c +++ b/drivers/pmdomain/imx/scu-pd.c @@ -150,7 +150,8 @@ static const struct imx_sc_pd_range imx8qxp_scu_pd_ranges[] = { { "mclk-out-1", IMX_SC_R_MCLK_OUT_1, 1, false, 0 }, { "dma0-ch", IMX_SC_R_DMA_0_CH0, 32, true, 0 }, { "dma1-ch", IMX_SC_R_DMA_1_CH0, 16, true, 0 }, - { "dma2-ch", IMX_SC_R_DMA_2_CH0, 32, true, 0 }, + { "dma2-ch-0", IMX_SC_R_DMA_2_CH0, 5, true, 0 }, + { "dma2-ch-1", IMX_SC_R_DMA_2_CH5, 27, true, 0 }, { "dma3-ch", IMX_SC_R_DMA_3_CH0, 32, true, 0 }, { "asrc0", IMX_SC_R_ASRC_0, 1, false, 0 }, { "asrc1", IMX_SC_R_ASRC_1, 1, false, 0 }, diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 362bf756e6b7..282cd7d24077 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -10,6 +10,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/timekeeping.h> +#include <linux/debugfs.h> #include <linux/nospec.h> @@ -101,19 +102,67 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, return 0; } -int ptp_open(struct posix_clock *pc, fmode_t fmode) +int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode) { + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); + struct timestamp_event_queue *queue; + char debugfsname[32]; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -EINVAL; + queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL); + if (!queue->mask) { + kfree(queue); + return -EINVAL; + } + bitmap_set(queue->mask, 0, PTP_MAX_CHANNELS); + spin_lock_init(&queue->lock); + list_add_tail(&queue->qlist, &ptp->tsevqs); + pccontext->private_clkdata = queue; + + /* Debugfs contents */ + sprintf(debugfsname, "0x%p", queue); + queue->debugfs_instance = + debugfs_create_dir(debugfsname, ptp->debugfs_root); + queue->dfs_bitmap.array = (u32 *)queue->mask; + queue->dfs_bitmap.n_elements = + DIV_ROUND_UP(PTP_MAX_CHANNELS, BITS_PER_BYTE * sizeof(u32)); + debugfs_create_u32_array("mask", 0444, queue->debugfs_instance, + &queue->dfs_bitmap); + return 0; } -long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) +int ptp_release(struct posix_clock_context *pccontext) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct timestamp_event_queue *queue = pccontext->private_clkdata; + unsigned long flags; + + if (queue) { + debugfs_remove(queue->debugfs_instance); + pccontext->private_clkdata = NULL; + spin_lock_irqsave(&queue->lock, flags); + list_del(&queue->qlist); + spin_unlock_irqrestore(&queue->lock, flags); + bitmap_free(queue->mask); + kfree(queue); + } + return 0; +} + +long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg) +{ + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); struct ptp_sys_offset_extended *extoff = NULL; struct ptp_sys_offset_precise precise_offset; struct system_device_crosststamp xtstamp; struct ptp_clock_info *ops = ptp->info; struct ptp_sys_offset *sysoff = NULL; + struct timestamp_event_queue *tsevq; struct ptp_system_timestamp sts; struct ptp_clock_request req; struct ptp_clock_caps caps; @@ -123,6 +172,8 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) struct timespec64 ts; int enable, err = 0; + tsevq = pccontext->private_clkdata; + switch (cmd) { case PTP_CLOCK_GETCAPS: @@ -421,6 +472,22 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) mutex_unlock(&ptp->pincfg_mux); break; + case PTP_MASK_CLEAR_ALL: + bitmap_clear(tsevq->mask, 0, PTP_MAX_CHANNELS); + break; + + case PTP_MASK_EN_SINGLE: + if (copy_from_user(&i, (void __user *)arg, sizeof(i))) { + err = -EFAULT; + break; + } + if (i >= PTP_MAX_CHANNELS) { + err = -EFAULT; + break; + } + set_bit(i, tsevq->mask); + break; + default: err = -ENOTTY; break; @@ -432,53 +499,65 @@ out: return err; } -__poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait) +__poll_t ptp_poll(struct posix_clock_context *pccontext, struct file *fp, + poll_table *wait) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); + struct timestamp_event_queue *queue; + + queue = pccontext->private_clkdata; + if (!queue) + return EPOLLERR; poll_wait(fp, &ptp->tsev_wq, wait); - return queue_cnt(&ptp->tsevq) ? EPOLLIN : 0; + return queue_cnt(queue) ? EPOLLIN : 0; } #define EXTTS_BUFSIZE (PTP_BUF_TIMESTAMPS * sizeof(struct ptp_extts_event)) -ssize_t ptp_read(struct posix_clock *pc, - uint rdflags, char __user *buf, size_t cnt) +ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags, + char __user *buf, size_t cnt) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); - struct timestamp_event_queue *queue = &ptp->tsevq; + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); + struct timestamp_event_queue *queue; struct ptp_extts_event *event; unsigned long flags; size_t qcnt, i; int result; - if (cnt % sizeof(struct ptp_extts_event) != 0) - return -EINVAL; + queue = pccontext->private_clkdata; + if (!queue) { + result = -EINVAL; + goto exit; + } + + if (cnt % sizeof(struct ptp_extts_event) != 0) { + result = -EINVAL; + goto exit; + } if (cnt > EXTTS_BUFSIZE) cnt = EXTTS_BUFSIZE; cnt = cnt / sizeof(struct ptp_extts_event); - if (mutex_lock_interruptible(&ptp->tsevq_mux)) - return -ERESTARTSYS; - if (wait_event_interruptible(ptp->tsev_wq, ptp->defunct || queue_cnt(queue))) { - mutex_unlock(&ptp->tsevq_mux); return -ERESTARTSYS; } if (ptp->defunct) { - mutex_unlock(&ptp->tsevq_mux); - return -ENODEV; + result = -ENODEV; + goto exit; } event = kmalloc(EXTTS_BUFSIZE, GFP_KERNEL); if (!event) { - mutex_unlock(&ptp->tsevq_mux); - return -ENOMEM; + result = -ENOMEM; + goto exit; } spin_lock_irqsave(&queue->lock, flags); @@ -497,12 +576,16 @@ ssize_t ptp_read(struct posix_clock *pc, cnt = cnt * sizeof(struct ptp_extts_event); - mutex_unlock(&ptp->tsevq_mux); - result = cnt; - if (copy_to_user(buf, event, cnt)) + if (copy_to_user(buf, event, cnt)) { result = -EFAULT; + goto free_event; + } +free_event: kfree(event); +exit: + if (result < 0) + ptp_release(pccontext); return result; } diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 80f74e38c2da..2e801cd33220 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/debugfs.h> #include <uapi/linux/sched/types.h> #include "ptp_private.h" @@ -162,6 +163,7 @@ static struct posix_clock_operations ptp_clock_ops = { .clock_settime = ptp_clock_settime, .ioctl = ptp_ioctl, .open = ptp_open, + .release = ptp_release, .poll = ptp_poll, .read = ptp_read, }; @@ -169,12 +171,22 @@ static struct posix_clock_operations ptp_clock_ops = { static void ptp_clock_release(struct device *dev) { struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); + struct timestamp_event_queue *tsevq; + unsigned long flags; ptp_cleanup_pin_groups(ptp); kfree(ptp->vclock_index); - mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); mutex_destroy(&ptp->n_vclocks_mux); + /* Delete first entry */ + tsevq = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue, + qlist); + spin_lock_irqsave(&tsevq->lock, flags); + list_del(&tsevq->qlist); + spin_unlock_irqrestore(&tsevq->lock, flags); + bitmap_free(tsevq->mask); + kfree(tsevq); + debugfs_remove(ptp->debugfs_root); ida_free(&ptp_clocks_map, ptp->index); kfree(ptp); } @@ -206,7 +218,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) { struct ptp_clock *ptp; + struct timestamp_event_queue *queue = NULL; int err = 0, index, major = MAJOR(ptp_devt); + char debugfsname[8]; size_t size; if (info->n_alarm > PTP_MAX_ALARMS) @@ -228,8 +242,16 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, ptp->info = info; ptp->devid = MKDEV(major, index); ptp->index = index; - spin_lock_init(&ptp->tsevq.lock); - mutex_init(&ptp->tsevq_mux); + INIT_LIST_HEAD(&ptp->tsevqs); + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + goto no_memory_queue; + list_add_tail(&queue->qlist, &ptp->tsevqs); + queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL); + if (!queue->mask) + goto no_memory_bitmap; + bitmap_set(queue->mask, 0, PTP_MAX_CHANNELS); + spin_lock_init(&queue->lock); mutex_init(&ptp->pincfg_mux); mutex_init(&ptp->n_vclocks_mux); init_waitqueue_head(&ptp->tsev_wq); @@ -320,6 +342,10 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, return ERR_PTR(err); } + /* Debugfs initialization */ + sprintf(debugfsname, "ptp%d", ptp->index); + ptp->debugfs_root = debugfs_create_dir(debugfsname, NULL); + return ptp; no_pps: @@ -330,9 +356,13 @@ no_mem_for_vclocks: if (ptp->kworker) kthread_destroy_worker(ptp->kworker); kworker_err: - mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); mutex_destroy(&ptp->n_vclocks_mux); + bitmap_free(queue->mask); +no_memory_bitmap: + list_del(&queue->qlist); + kfree(queue); +no_memory_queue: ida_free(&ptp_clocks_map, index); no_slot: kfree(ptp); @@ -375,6 +405,7 @@ EXPORT_SYMBOL(ptp_clock_unregister); void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event) { + struct timestamp_event_queue *tsevq; struct pps_event_time evt; switch (event->type) { @@ -383,7 +414,11 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event) break; case PTP_CLOCK_EXTTS: - enqueue_external_timestamp(&ptp->tsevq, event); + /* Enqueue timestamp on selected queues */ + list_for_each_entry(tsevq, &ptp->tsevqs, qlist) { + if (test_bit((unsigned int)event->index, tsevq->mask)) + enqueue_external_timestamp(tsevq, event); + } wake_up_interruptible(&ptp->tsev_wq); break; diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 75f58fc468a7..52f87e394aa6 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -15,16 +15,24 @@ #include <linux/ptp_clock.h> #include <linux/ptp_clock_kernel.h> #include <linux/time.h> +#include <linux/list.h> +#include <linux/bitmap.h> +#include <linux/debugfs.h> #define PTP_MAX_TIMESTAMPS 128 #define PTP_BUF_TIMESTAMPS 30 #define PTP_DEFAULT_MAX_VCLOCKS 20 +#define PTP_MAX_CHANNELS 2048 struct timestamp_event_queue { struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; int head; int tail; spinlock_t lock; + struct list_head qlist; + unsigned long *mask; + struct dentry *debugfs_instance; + struct debugfs_u32_array dfs_bitmap; }; struct ptp_clock { @@ -35,8 +43,7 @@ struct ptp_clock { int index; /* index into clocks.map */ struct pps_device *pps_source; long dialed_frequency; /* remembers the frequency adjustment */ - struct timestamp_event_queue tsevq; /* simple fifo for time stamps */ - struct mutex tsevq_mux; /* one process at a time reading the fifo */ + struct list_head tsevqs; /* timestamp fifo list */ struct mutex pincfg_mux; /* protect concurrent info->pin_config access */ wait_queue_head_t tsev_wq; int defunct; /* tells readers to go away when clock is being removed */ @@ -53,6 +60,7 @@ struct ptp_clock { struct mutex n_vclocks_mux; /* protect concurrent n_vclocks access */ bool is_virtual_clock; bool has_cycles; + struct dentry *debugfs_root; }; #define info_to_vclock(d) container_of((d), struct ptp_vclock, info) @@ -117,16 +125,18 @@ extern struct class *ptp_class; int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan); -long ptp_ioctl(struct posix_clock *pc, - unsigned int cmd, unsigned long arg); +long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); -int ptp_open(struct posix_clock *pc, fmode_t fmode); +int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode); -ssize_t ptp_read(struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); +int ptp_release(struct posix_clock_context *pccontext); -__poll_t ptp_poll(struct posix_clock *pc, - struct file *fp, poll_table *wait); +ssize_t ptp_read(struct posix_clock_context *pccontext, uint flags, char __user *buf, + size_t cnt); + +__poll_t ptp_poll(struct posix_clock_context *pccontext, struct file *fp, + poll_table *wait); /* * see ptp_sysfs.c diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 6e4d5456a885..7d023d9d0acb 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -75,17 +75,21 @@ static ssize_t extts_fifo_show(struct device *dev, struct device_attribute *attr, char *page) { struct ptp_clock *ptp = dev_get_drvdata(dev); - struct timestamp_event_queue *queue = &ptp->tsevq; + struct timestamp_event_queue *queue; struct ptp_extts_event event; unsigned long flags; size_t qcnt; int cnt = 0; - memset(&event, 0, sizeof(event)); + cnt = list_count_nodes(&ptp->tsevqs); + if (cnt <= 0) + goto out; - if (mutex_lock_interruptible(&ptp->tsevq_mux)) - return -ERESTARTSYS; + /* The sysfs fifo will always draw from the fist queue */ + queue = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue, + qlist); + memset(&event, 0, sizeof(event)); spin_lock_irqsave(&queue->lock, flags); qcnt = queue_cnt(queue); if (qcnt) { @@ -100,7 +104,6 @@ static ssize_t extts_fifo_show(struct device *dev, cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n", event.index, event.t.sec, event.t.nsec); out: - mutex_unlock(&ptp->tsevq_mux); return cnt; } static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index 74760c1a163b..4902d45e929c 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -102,7 +102,7 @@ config CCWGROUP config ISM tristate "Support for ISM vPCI Adapter" - depends on PCI && SMC + depends on PCI default n help Select this option if you want to use the Internal Shared Memory diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 902655d75947..44680f65ea14 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1627,12 +1627,13 @@ int scsi_rescan_device(struct scsi_device *sdev) device_lock(dev); /* - * Bail out if the device is not running. Otherwise, the rescan may - * block waiting for commands to be executed, with us holding the - * device lock. This can result in a potential deadlock in the power - * management core code when system resume is on-going. + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. */ - if (sdev->sdev_state != SDEV_RUNNING) { + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { ret = -EWOULDBLOCK; goto unlock; } diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig index 5a75ab64d1ed..12040ce116a5 100644 --- a/drivers/soc/renesas/Kconfig +++ b/drivers/soc/renesas/Kconfig @@ -333,6 +333,7 @@ if RISCV config ARCH_R9A07G043 bool "RISC-V Platform support for RZ/Five" + depends on NONPORTABLE select ARCH_RZG2L select AX45MP_L2_CACHE if RISCV_DMA_NONCOHERENT select DMA_GLOBAL_POOL diff --git a/drivers/staging/media/atomisp/Kconfig b/drivers/staging/media/atomisp/Kconfig index 5d8917160d41..75c985da75b5 100644 --- a/drivers/staging/media/atomisp/Kconfig +++ b/drivers/staging/media/atomisp/Kconfig @@ -12,12 +12,12 @@ menuconfig INTEL_ATOMISP config VIDEO_ATOMISP tristate "Intel Atom Image Signal Processor Driver" depends on VIDEO_DEV && INTEL_ATOMISP + depends on IPU_BRIDGE depends on MEDIA_PCI_SUPPORT depends on PMIC_OPREGION depends on I2C select V4L2_FWNODE select IOSF_MBI - select IPU_BRIDGE select VIDEOBUF2_VMALLOC select VIDEO_V4L2_SUBDEV_API help diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c index e98b3010520e..94171e62dee9 100644 --- a/drivers/staging/media/tegra-video/vi.c +++ b/drivers/staging/media/tegra-video/vi.c @@ -1455,17 +1455,18 @@ static int __maybe_unused vi_runtime_suspend(struct device *dev) } /* - * Graph Management + * Find the entity matching a given fwnode in an v4l2_async_notifier list */ static struct tegra_vi_graph_entity * -tegra_vi_graph_find_entity(struct tegra_vi_channel *chan, +tegra_vi_graph_find_entity(struct list_head *list, const struct fwnode_handle *fwnode) { struct tegra_vi_graph_entity *entity; struct v4l2_async_connection *asd; - list_for_each_entry(asd, &chan->notifier.done_list, asc_entry) { + list_for_each_entry(asd, list, asc_entry) { entity = to_tegra_vi_graph_entity(asd); + if (entity->asd.match.fwnode == fwnode) return entity; } @@ -1532,7 +1533,8 @@ static int tegra_vi_graph_build(struct tegra_vi_channel *chan, } /* find the remote entity from notifier list */ - ent = tegra_vi_graph_find_entity(chan, link.remote_node); + ent = tegra_vi_graph_find_entity(&chan->notifier.done_list, + link.remote_node); if (!ent) { dev_err(vi->dev, "no entity found for %pOF\n", to_of_node(link.remote_node)); @@ -1664,7 +1666,8 @@ static int tegra_vi_graph_notify_bound(struct v4l2_async_notifier *notifier, * Locate the entity corresponding to the bound subdev and store the * subdev pointer. */ - entity = tegra_vi_graph_find_entity(chan, subdev->fwnode); + entity = tegra_vi_graph_find_entity(&chan->notifier.waiting_list, + subdev->fwnode); if (!entity) { dev_err(vi->dev, "no entity for subdev %s\n", subdev->name); return -EINVAL; @@ -1713,7 +1716,8 @@ static int tegra_vi_graph_parse_one(struct tegra_vi_channel *chan, /* skip entities that are already processed */ if (device_match_fwnode(vi->dev, remote) || - tegra_vi_graph_find_entity(chan, remote)) { + tegra_vi_graph_find_entity(&chan->notifier.waiting_list, + remote)) { fwnode_handle_put(remote); continue; } diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 372d64756ed6..3c15f6a9e91c 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -217,12 +217,12 @@ unlock: return rc; } +/* mutex must be held by caller */ static void destroy_session(struct kref *ref) { struct amdtee_session *sess = container_of(ref, struct amdtee_session, refcount); - mutex_lock(&session_list_mutex); list_del(&sess->list_node); mutex_unlock(&session_list_mutex); kfree(sess); @@ -272,7 +272,8 @@ int amdtee_open_session(struct tee_context *ctx, if (arg->ret != TEEC_SUCCESS) { pr_err("open_session failed %d\n", arg->ret); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, + &session_list_mutex); goto out; } @@ -290,7 +291,8 @@ int amdtee_open_session(struct tee_context *ctx, pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, + &session_list_mutex); rc = -ENOMEM; goto out; } @@ -331,7 +333,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session) handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, &session_list_mutex); return 0; } diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 83711aad855c..f75731396b7e 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -398,6 +398,11 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) return val < vq->num; } +static bool vhost_transport_msgzerocopy_allow(void) +{ + return true; +} + static bool vhost_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport vhost_transport = { @@ -431,6 +436,8 @@ static struct virtio_transport vhost_transport = { .seqpacket_allow = vhost_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = vhost_transport_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 0bb86e6c4d0a..1b2136fe0fa5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/irqnr.h> #include <linux/pci.h> +#include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/cpuhotplug.h> #include <linux/atomic.h> @@ -96,6 +97,7 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; + struct rcu_work rwork; short refcnt; u8 spurious_cnt; u8 is_accounted; @@ -147,22 +149,12 @@ const struct evtchn_ops *evtchn_ops; static DEFINE_MUTEX(irq_mapping_update_lock); /* - * Lock protecting event handling loop against removing event channels. - * Adding of event channels is no issue as the associated IRQ becomes active - * only after everything is setup (before request_[threaded_]irq() the handler - * can't be entered for an event, as the event channel will be unmasked only - * then). - */ -static DEFINE_RWLOCK(evtchn_rwlock); - -/* * Lock hierarchy: * * irq_mapping_update_lock - * evtchn_rwlock - * IRQ-desc lock - * percpu eoi_list_lock - * irq_info->lock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -306,6 +298,22 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void delayed_free_irq(struct work_struct *work) +{ + struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, + rwork); + unsigned int irq = info->irq; + + /* Remove the info pointer only now, with no potential users left. */ + set_info_for_irq(irq, NULL); + + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -668,33 +676,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); while (true) { - spin_lock(&eoi->eoi_list_lock); + spin_lock_irqsave(&eoi->eoi_list_lock, flags); info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, eoi_list); - if (info == NULL || now < info->eoi_time) { - spin_unlock(&eoi->eoi_list_lock); + if (info == NULL) + break; + + if (now < info->eoi_time) { + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, + info->eoi_time - now); break; } list_del_init(&info->eoi_list); - spin_unlock(&eoi->eoi_list_lock); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); info->eoi_time = 0; xen_irq_lateeoi_locked(info, false); } - if (info) - mod_delayed_work_on(info->eoi_cpu, system_wq, - &eoi->delayed, info->eoi_time - now); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } static void xen_cpu_init_eoi(unsigned int cpu) @@ -709,16 +720,15 @@ static void xen_cpu_init_eoi(unsigned int cpu) void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; - unsigned long flags; - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); info = info_for_irq(irq); if (info) xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); @@ -732,6 +742,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); set_info_for_irq(irq, info); /* @@ -789,31 +800,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); - unsigned long flags; if (WARN_ON(!info)) return; - write_lock_irqsave(&evtchn_rwlock, flags); - if (!list_empty(&info->eoi_list)) lateeoi_list_del(info); list_del(&info->list); - set_info_for_irq(irq, NULL); - WARN_ON(info->refcnt > 0); - write_unlock_irqrestore(&evtchn_rwlock, flags); - - kfree(info); - - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < nr_legacy_irqs()) - return; - - irq_free_desc(irq); + queue_rcu_work(system_wq, &info->rwork); } /* Not called for lateeoi events. */ @@ -1711,7 +1709,14 @@ int xen_evtchn_do_upcall(void) int cpu = smp_processor_id(); struct evtchn_loop_ctrl ctrl = { 0 }; - read_lock(&evtchn_rwlock); + /* + * When closing an event channel the associated IRQ must not be freed + * until all cpus have left the event handling loop. This is ensured + * by taking the rcu_read_lock() while handling events, as freeing of + * the IRQ is handled via queue_rcu_work() _after_ closing the event + * channel. + */ + rcu_read_lock(); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1724,7 +1729,7 @@ int xen_evtchn_do_upcall(void) } while (vcpu_info->evtchn_upcall_pending); - read_unlock(&evtchn_rwlock); + rcu_read_unlock(); /* * Increment irq_epoch only now to defer EOIs only for diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a4cb4b642987..da519c1b6ad0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -682,18 +682,30 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, u64 search_start; int ret; - if (test_bit(BTRFS_ROOT_DELETING, &root->state)) - btrfs_err(fs_info, - "COW'ing blocks on a fs root that's being dropped"); - - if (trans->transaction != fs_info->running_transaction) - WARN(1, KERN_CRIT "trans %llu running %llu\n", - trans->transid, - fs_info->running_transaction->transid); + if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { + btrfs_abort_transaction(trans, -EUCLEAN); + btrfs_crit(fs_info, + "attempt to COW block %llu on root %llu that is being deleted", + buf->start, btrfs_root_id(root)); + return -EUCLEAN; + } - if (trans->transid != fs_info->generation) - WARN(1, KERN_CRIT "trans %llu running %llu\n", - trans->transid, fs_info->generation); + /* + * COWing must happen through a running transaction, which always + * matches the current fs generation (it's a transaction with a state + * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs + * into error state to prevent the commit of any transaction. + */ + if (unlikely(trans->transaction != fs_info->running_transaction || + trans->transid != fs_info->generation)) { + btrfs_abort_transaction(trans, -EUCLEAN); + btrfs_crit(fs_info, +"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu", + buf->start, btrfs_root_id(root), trans->transid, + fs_info->running_transaction->transid, + fs_info->generation); + return -EUCLEAN; + } if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; @@ -805,8 +817,22 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, int progress_passed = 0; struct btrfs_disk_key disk_key; - WARN_ON(trans->transaction != fs_info->running_transaction); - WARN_ON(trans->transid != fs_info->generation); + /* + * COWing must happen through a running transaction, which always + * matches the current fs generation (it's a transaction with a state + * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs + * into error state to prevent the commit of any transaction. + */ + if (unlikely(trans->transaction != fs_info->running_transaction || + trans->transid != fs_info->generation)) { + btrfs_abort_transaction(trans, -EUCLEAN); + btrfs_crit(fs_info, +"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu", + parent->start, btrfs_root_id(root), trans->transid, + fs_info->running_transaction->transid, + fs_info->generation); + return -EUCLEAN; + } parent_nritems = btrfs_header_nritems(parent); blocksize = fs_info->nodesize; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index caf0bbd028d1..90aaedce1548 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -313,7 +313,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len, { struct btrfs_delayed_item *item; - item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); + item = kmalloc(struct_size(item, data, data_len), GFP_NOFS); if (item) { item->data_len = data_len; item->type = type; diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index dc1085b2a397..1da213197f55 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -95,7 +95,7 @@ struct btrfs_delayed_item { bool logged; /* The maximum leaf size is 64K, so u16 is more than enough. */ u16 data_len; - char data[]; + char data[] __counted_by(data_len); }; static inline void btrfs_init_delayed_root( diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 75ab766fe156..8e7d03bc1b56 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2978,7 +2978,7 @@ static void get_block_group_info(struct list_head *groups_list, static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, void __user *arg) { - struct btrfs_ioctl_space_args space_args; + struct btrfs_ioctl_space_args space_args = { 0 }; struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; struct btrfs_ioctl_space_info *dest_orig; @@ -4338,7 +4338,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat if (compat) { #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) - struct btrfs_ioctl_send_args_32 args32; + struct btrfs_ioctl_send_args_32 args32 = { 0 }; ret = copy_from_user(&args32, argp, sizeof(args32)); if (ret) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 6b309f8a99a8..93869cda6af9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -219,8 +219,8 @@ do { \ (errno))) { \ /* Stack trace printed. */ \ } else { \ - btrfs_debug((trans)->fs_info, \ - "Transaction aborted (error %d)", \ + btrfs_err((trans)->fs_info, \ + "Transaction aborted (error %d)", \ (errno)); \ } \ } \ diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 73091fbe3ea4..dee10d22ada9 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -217,9 +217,12 @@ again: strm->buf.out_size = min_t(u32, outlen, PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; - if (!rq->out[no] && rq->fillgaps) /* deduped */ + if (!rq->out[no] && rq->fillgaps) { /* deduped */ rq->out[no] = erofs_allocpage(pagepool, GFP_KERNEL | __GFP_NOFAIL); + set_page_private(rq->out[no], + Z_EROFS_SHORTLIVED_PAGE); + } if (rq->out[no]) strm->buf.out = kmap(rq->out[no]) + pageofs; pageofs = 0; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 44a24d573f1f..3700af9ee173 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -235,7 +235,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, return PTR_ERR(ptr); dis = ptr + erofs_blkoff(sb, *pos); - if (!dif->path) { + if (!sbi->devs->flatdev && !dif->path) { if (!dis->tag[0]) { erofs_err(sb, "empty device tag @ pos %llu", *pos); return -EINVAL; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9e72bfe8bbad..31e897ad5e6a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -233,19 +233,18 @@ static void put_quota_format(struct quota_format_type *fmt) * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * - * When the last reference of a dquot will be dropped, the dquot will be - * added to releasing_dquots. We'd then queue work item which would call + * When the last reference of a dquot is dropped, the dquot is added to + * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the - * dquots on the list. Both releasing_dquots and free_dquots use the - * dq_free list_head in the dquot struct. When a dquot is removed from - * releasing_dquots, a reference count is always subtracted, and if - * dq_count == 0 at that point, the dquot will be added to the free_dquots. + * dquots on the list. Each cleaned up dquot is moved to free_dquots list. + * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot + * struct. * - * Unused dquots (dq_count == 0) are added to the free_dquots list when freed, - * and this list is searched whenever we need an available dquot. Dquots are - * removed from the list as soon as they are used again, and - * dqstats.free_dquots gives the number of dquots on the list. When - * dquot is invalidated it's completely released from memory. + * Unused and cleaned up dquots are in the free_dquots list and this list is + * searched whenever we need an available dquot. Dquots are removed from the + * list as soon as they are used again and dqstats.free_dquots gives the number + * of dquots on the list. When dquot is invalidated it's completely released + * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to @@ -321,6 +320,7 @@ static inline void put_dquot_last(struct dquot *dquot) static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); + set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) @@ -328,8 +328,10 @@ static inline void remove_free_dquot(struct dquot *dquot) if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); - if (!atomic_read(&dquot->dq_count)) + if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); + else + clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) @@ -581,12 +583,6 @@ restart: continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { - /* dquot in releasing_dquots, flush and retry */ - if (!list_empty(&dquot->dq_free)) { - spin_unlock(&dq_list_lock); - goto restart; - } - atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* @@ -606,6 +602,15 @@ restart: goto restart; } /* + * The last user already dropped its reference but dquot didn't + * get fully cleaned up yet. Restart the scan which flushes the + * work cleaning up released dquots. + */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + goto restart; + } + /* * Quota now has no users and it has been written on last * dqput() */ @@ -696,6 +701,13 @@ int dquot_writeback_dquots(struct super_block *sb, int type) dq_dirty); WARN_ON(!dquot_active(dquot)); + /* If the dquot is releasing we should not touch it */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + flush_delayed_work("a_release_work); + spin_lock(&dq_list_lock); + continue; + } /* Now we have active dquot from which someone is * holding reference so we can safely just increase @@ -809,18 +821,18 @@ static void quota_release_workfn(struct work_struct *work) /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); + synchronize_srcu(&dquot_srcu); restart: - synchronize_srcu(&dquot_srcu); spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); - /* Dquot got used again? */ - if (atomic_read(&dquot->dq_count) > 1) { - remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); - continue; - } + WARN_ON_ONCE(atomic_read(&dquot->dq_count)); + /* + * Note that DQ_RELEASING_B protects us from racing with + * invalidate_dquots() calls so we are safe to work with the + * dquot even after we drop dq_list_lock. + */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ @@ -834,7 +846,6 @@ restart: } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); @@ -875,6 +886,7 @@ void dqput(struct dquot *dquot) BUG_ON(!list_empty(&dquot->dq_free)); #endif put_releasing_dquots(dquot); + atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(system_unbound_wq, "a_release_work, 1); } @@ -963,7 +975,7 @@ we_slept: dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is - * already finished or it will be canceled due to dq_count > 1 test */ + * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 3902e90dca6b..7b923e36501b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2474,8 +2474,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) static struct cifs_tcon * cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) { - int rc, xid; struct cifs_tcon *tcon; + bool nohandlecache; + int rc, xid; tcon = cifs_find_tcon(ses, ctx); if (tcon) { @@ -2493,14 +2494,17 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) goto out_fail; } - if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) - tcon = tcon_info_alloc(true); + if (ses->server->dialect >= SMB20_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)) + nohandlecache = ctx->nohandlecache; else - tcon = tcon_info_alloc(false); + nohandlecache = true; + tcon = tcon_info_alloc(!nohandlecache); if (tcon == NULL) { rc = -ENOMEM; goto out_fail; } + tcon->nohandlecache = nohandlecache; if (ctx->snapshot_time) { if (ses->server->vals->protocol_id == 0) { @@ -2662,10 +2666,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->nocase = ctx->nocase; tcon->broken_sparse_sup = ctx->no_sparse; tcon->max_cached_dirs = ctx->max_cached_dirs; - if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) - tcon->nohandlecache = ctx->nohandlecache; - else - tcon->nohandlecache = true; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); @@ -2895,9 +2895,9 @@ bind_socket(struct TCP_Server_Info *server) if (server->srcaddr.ss_family != AF_UNSPEC) { /* Bind to the specified local IP address */ struct socket *socket = server->ssocket; - rc = socket->ops->bind(socket, - (struct sockaddr *) &server->srcaddr, - sizeof(server->srcaddr)); + rc = kernel_bind(socket, + (struct sockaddr *) &server->srcaddr, + sizeof(server->srcaddr)); if (rc < 0) { struct sockaddr_in *saddr4; struct sockaddr_in6 *saddr6; @@ -3046,8 +3046,8 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); - rc = socket->ops->connect(socket, saddr, slen, - server->noblockcnt ? O_NONBLOCK : 0); + rc = kernel_connect(socket, saddr, slen, + server->noblockcnt ? O_NONBLOCK : 0); /* * When mounting SMB root file systems, we do not want to block in * connect. Otherwise bail out and then let cifs_reconnect() perform diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index db7fa704a3f6..4b38c3a285f6 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -84,6 +84,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) spin_lock_init(&conn->llist_lock); INIT_LIST_HEAD(&conn->lock_list); + init_rwsem(&conn->session_lock); + down_write(&conn_list_lock); list_add(&conn->conns_list, &conn_list); up_write(&conn_list_lock); diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index ab2583f030ce..3c005246a32e 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -50,6 +50,7 @@ struct ksmbd_conn { struct nls_table *local_nls; struct unicode_map *um; struct list_head conns_list; + struct rw_semaphore session_lock; /* smb session 1 per user */ struct xarray sessions; unsigned long last_active; diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index 408cddf2f094..d2c81a8a11dd 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -73,7 +73,10 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, tree_conn->user = sess->user; tree_conn->share_conf = sc; + tree_conn->t_state = TREE_NEW; status.tree_conn = tree_conn; + atomic_set(&tree_conn->refcount, 1); + init_waitqueue_head(&tree_conn->refcount_q); ret = xa_err(xa_store(&sess->tree_conns, tree_conn->id, tree_conn, GFP_KERNEL)); @@ -93,14 +96,33 @@ out_error: return status; } +void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon) +{ + /* + * Checking waitqueue to releasing tree connect on + * tree disconnect. waitqueue_active is safe because it + * uses atomic operation for condition. + */ + if (!atomic_dec_return(&tcon->refcount) && + waitqueue_active(&tcon->refcount_q)) + wake_up(&tcon->refcount_q); +} + int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, struct ksmbd_tree_connect *tree_conn) { int ret; + write_lock(&sess->tree_conns_lock); + xa_erase(&sess->tree_conns, tree_conn->id); + write_unlock(&sess->tree_conns_lock); + + if (!atomic_dec_and_test(&tree_conn->refcount)) + wait_event(tree_conn->refcount_q, + atomic_read(&tree_conn->refcount) == 0); + ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id); ksmbd_release_tree_conn_id(sess, tree_conn->id); - xa_erase(&sess->tree_conns, tree_conn->id); ksmbd_share_config_put(tree_conn->share_conf); kfree(tree_conn); return ret; @@ -111,11 +133,15 @@ struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess, { struct ksmbd_tree_connect *tcon; + read_lock(&sess->tree_conns_lock); tcon = xa_load(&sess->tree_conns, id); if (tcon) { - if (test_bit(TREE_CONN_EXPIRE, &tcon->status)) + if (tcon->t_state != TREE_CONNECTED) + tcon = NULL; + else if (!atomic_inc_not_zero(&tcon->refcount)) tcon = NULL; } + read_unlock(&sess->tree_conns_lock); return tcon; } @@ -129,8 +155,18 @@ int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess) if (!sess) return -EINVAL; - xa_for_each(&sess->tree_conns, id, tc) + xa_for_each(&sess->tree_conns, id, tc) { + write_lock(&sess->tree_conns_lock); + if (tc->t_state == TREE_DISCONNECTED) { + write_unlock(&sess->tree_conns_lock); + ret = -ENOENT; + continue; + } + tc->t_state = TREE_DISCONNECTED; + write_unlock(&sess->tree_conns_lock); + ret |= ksmbd_tree_conn_disconnect(sess, tc); + } xa_destroy(&sess->tree_conns); return ret; } diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h index 562d647ad9fa..6377a70b811c 100644 --- a/fs/smb/server/mgmt/tree_connect.h +++ b/fs/smb/server/mgmt/tree_connect.h @@ -14,7 +14,11 @@ struct ksmbd_share_config; struct ksmbd_user; struct ksmbd_conn; -#define TREE_CONN_EXPIRE 1 +enum { + TREE_NEW = 0, + TREE_CONNECTED, + TREE_DISCONNECTED +}; struct ksmbd_tree_connect { int id; @@ -27,7 +31,9 @@ struct ksmbd_tree_connect { int maximal_access; bool posix_extensions; - unsigned long status; + atomic_t refcount; + wait_queue_head_t refcount_q; + unsigned int t_state; }; struct ksmbd_tree_conn_status { @@ -46,6 +52,7 @@ struct ksmbd_session; struct ksmbd_tree_conn_status ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, const char *share_name); +void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon); int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, struct ksmbd_tree_connect *tree_conn); diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 8a5dcab05614..15f68ee05089 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -174,7 +174,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) unsigned long id; struct ksmbd_session *sess; - down_write(&sessions_table_lock); + down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { if (sess->state != SMB2_SESSION_VALID || time_after(jiffies, @@ -185,7 +185,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn) continue; } } - up_write(&sessions_table_lock); + up_write(&conn->session_lock); } int ksmbd_session_register(struct ksmbd_conn *conn, @@ -227,7 +227,9 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) } } } + up_write(&sessions_table_lock); + down_write(&conn->session_lock); xa_for_each(&conn->sessions, id, sess) { unsigned long chann_id; struct channel *chann; @@ -244,7 +246,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn) ksmbd_session_destroy(sess); } } - up_write(&sessions_table_lock); + up_write(&conn->session_lock); } struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, @@ -252,9 +254,11 @@ struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn, { struct ksmbd_session *sess; + down_read(&conn->session_lock); sess = xa_load(&conn->sessions, id); if (sess) sess->last_active = jiffies; + up_read(&conn->session_lock); return sess; } @@ -351,6 +355,7 @@ static struct ksmbd_session *__session_create(int protocol) xa_init(&sess->ksmbd_chann_list); xa_init(&sess->rpc_handle_list); sess->sequence_number = 1; + rwlock_init(&sess->tree_conns_lock); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h index f99d475b28db..63cb08fffde8 100644 --- a/fs/smb/server/mgmt/user_session.h +++ b/fs/smb/server/mgmt/user_session.h @@ -60,6 +60,7 @@ struct ksmbd_session { struct ksmbd_file_table file_table; unsigned long last_active; + rwlock_t tree_conns_lock; }; static inline int test_session_flag(struct ksmbd_session *sess, int bit) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index 32347fec33c4..3079e607c5fe 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -241,6 +241,8 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, } while (is_chained == true); send: + if (work->tcon) + ksmbd_tree_connect_put(work->tcon); smb3_preauth_hash_rsp(work); if (work->sess && work->sess->enc && work->encrypted && conn->ops->encrypt_resp) { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 544022dd6d20..898860adf929 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1993,6 +1993,9 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; + write_lock(&sess->tree_conns_lock); + status.tree_conn->t_state = TREE_CONNECTED; + write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: rsp->Capabilities = 0; @@ -2122,27 +2125,50 @@ int smb2_tree_disconnect(struct ksmbd_work *work) ksmbd_debug(SMB, "request\n"); + if (!tcon) { + ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId); + + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; + err = -ENOENT; + goto err_out; + } + + ksmbd_close_tree_conn_fds(work); + + write_lock(&sess->tree_conns_lock); + if (tcon->t_state == TREE_DISCONNECTED) { + write_unlock(&sess->tree_conns_lock); + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; + err = -ENOENT; + goto err_out; + } + + WARN_ON_ONCE(atomic_dec_and_test(&tcon->refcount)); + tcon->t_state = TREE_DISCONNECTED; + write_unlock(&sess->tree_conns_lock); + + err = ksmbd_tree_conn_disconnect(sess, tcon); + if (err) { + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; + goto err_out; + } + + work->tcon = NULL; + rsp->StructureSize = cpu_to_le16(4); err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_tree_disconnect_rsp)); if (err) { rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; - smb2_set_err_rsp(work); - return err; + goto err_out; } - if (!tcon || test_and_set_bit(TREE_CONN_EXPIRE, &tcon->status)) { - ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId); + return 0; - rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; - smb2_set_err_rsp(work); - return -ENOENT; - } +err_out: + smb2_set_err_rsp(work); + return err; - ksmbd_close_tree_conn_fds(work); - ksmbd_tree_conn_disconnect(sess, tcon); - work->tcon = NULL; - return 0; } /** @@ -2164,17 +2190,17 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_debug(SMB, "request\n"); - sess_id = le64_to_cpu(req->hdr.SessionId); - - rsp->StructureSize = cpu_to_le16(4); - err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp)); - if (err) { - rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; + ksmbd_conn_lock(conn); + if (!ksmbd_conn_good(conn)) { + ksmbd_conn_unlock(conn); + rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; smb2_set_err_rsp(work); - return err; + return -ENOENT; } - + sess_id = le64_to_cpu(req->hdr.SessionId); ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_RECONNECT); + ksmbd_conn_unlock(conn); + ksmbd_close_session_fds(work); ksmbd_conn_wait_idle(conn, sess_id); @@ -2196,6 +2222,14 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_free_user(sess->user); sess->user = NULL; ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_NEGOTIATE); + + rsp->StructureSize = cpu_to_le16(4); + err = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_logoff_rsp)); + if (err) { + rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES; + smb2_set_err_rsp(work); + return err; + } return 0; } @@ -3370,8 +3404,10 @@ err_out: } ksmbd_revert_fsids(work); err_out1: - if (!rc) + if (!rc) { + ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); + } if (rc) { if (rc == -EINVAL) rsp->hdr.Status = STATUS_INVALID_PARAMETER; @@ -7028,10 +7064,6 @@ skip: ksmbd_debug(SMB, "would have to wait for getting lock\n"); - spin_lock(&work->conn->llist_lock); - list_add_tail(&smb_lock->clist, - &work->conn->lock_list); - spin_unlock(&work->conn->llist_lock); list_add(&smb_lock->llist, &rollback_list); argv = kmalloc(sizeof(void *), GFP_KERNEL); @@ -7062,9 +7094,6 @@ skip: if (work->state != KSMBD_WORK_ACTIVE) { list_del(&smb_lock->llist); - spin_lock(&work->conn->llist_lock); - list_del(&smb_lock->clist); - spin_unlock(&work->conn->llist_lock); locks_free_lock(flock); if (work->state == KSMBD_WORK_CANCELLED) { @@ -7084,19 +7113,16 @@ skip: } list_del(&smb_lock->llist); - spin_lock(&work->conn->llist_lock); - list_del(&smb_lock->clist); - spin_unlock(&work->conn->llist_lock); release_async_work(work); goto retry; } else if (!rc) { + list_add(&smb_lock->llist, &rollback_list); spin_lock(&work->conn->llist_lock); list_add_tail(&smb_lock->clist, &work->conn->lock_list); list_add_tail(&smb_lock->flist, &fp->lock_list); spin_unlock(&work->conn->llist_lock); - list_add(&smb_lock->llist, &rollback_list); ksmbd_debug(SMB, "successful in taking lock\n"); } else { goto out; @@ -8036,10 +8062,10 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); + opinfo_put(opinfo); + ksmbd_fd_put(work, fp); rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index f41f8d6108ce..c4b80ab7df74 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -333,6 +333,9 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) static struct ksmbd_file *ksmbd_fp_get(struct ksmbd_file *fp) { + if (fp->f_state != FP_INITED) + return NULL; + if (!atomic_inc_not_zero(&fp->refcount)) return NULL; return fp; @@ -382,15 +385,20 @@ int ksmbd_close_fd(struct ksmbd_work *work, u64 id) return 0; ft = &work->sess->file_table; - read_lock(&ft->lock); + write_lock(&ft->lock); fp = idr_find(ft->idr, id); if (fp) { set_close_state_blocked_works(fp); - if (!atomic_dec_and_test(&fp->refcount)) + if (fp->f_state != FP_INITED) fp = NULL; + else { + fp->f_state = FP_CLOSED; + if (!atomic_dec_and_test(&fp->refcount)) + fp = NULL; + } } - read_unlock(&ft->lock); + write_unlock(&ft->lock); if (!fp) return -EINVAL; @@ -570,6 +578,7 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp) fp->tcon = work->tcon; fp->volatile_id = KSMBD_NO_FID; fp->persistent_id = KSMBD_NO_FID; + fp->f_state = FP_NEW; fp->f_ci = ksmbd_inode_get(fp); if (!fp->f_ci) { @@ -591,6 +600,14 @@ err_out: return ERR_PTR(ret); } +void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state) +{ + write_lock(&ft->lock); + fp->f_state = state; + write_unlock(&ft->lock); +} + static int __close_file_table_ids(struct ksmbd_file_table *ft, struct ksmbd_tree_connect *tcon, diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index fcb13413fa8d..03d0bf941216 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -60,6 +60,12 @@ struct ksmbd_inode { __le32 m_fattr; }; +enum { + FP_NEW = 0, + FP_INITED, + FP_CLOSED +}; + struct ksmbd_file { struct file *filp; u64 persistent_id; @@ -98,6 +104,7 @@ struct ksmbd_file { /* if ls is happening on directory, below is valid*/ struct ksmbd_readdir_data readdir_data; int dot_dotdot[2]; + unsigned int f_state; }; static inline void set_ctx_actor(struct dir_context *ctx, @@ -142,6 +149,8 @@ int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode); int ksmbd_init_global_file_table(void); void ksmbd_free_global_file_table(void); void ksmbd_set_fd_limit(unsigned long limit); +void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp, + unsigned int state); /* * INODE hash diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index afc4c78b9eed..d5787991bb5b 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2010 Red Hat, Inc. + * Copyright (C) 2010, 2023 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" @@ -19,21 +19,147 @@ #include "xfs_log.h" #include "xfs_ag.h" -STATIC int -xfs_trim_extents( +/* + * Notes on an efficient, low latency fstrim algorithm + * + * We need to walk the filesystem free space and issue discards on the free + * space that meet the search criteria (size and location). We cannot issue + * discards on extents that might be in use, or are so recently in use they are + * still marked as busy. To serialise against extent state changes whilst we are + * gathering extents to trim, we must hold the AGF lock to lock out other + * allocations and extent free operations that might change extent state. + * + * However, we cannot just hold the AGF for the entire AG free space walk whilst + * we issue discards on each free space that is found. Storage devices can have + * extremely slow discard implementations (e.g. ceph RBD) and so walking a + * couple of million free extents and issuing synchronous discards on each + * extent can take a *long* time. Whilst we are doing this walk, nothing else + * can access the AGF, and we can stall transactions and hence the log whilst + * modifications wait for the AGF lock to be released. This can lead hung tasks + * kicking the hung task timer and rebooting the system. This is bad. + * + * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI + * lock, gathers a range of inode cluster buffers that are allocated, drops the + * AGI lock and then reads all the inode cluster buffers and processes them. It + * loops doing this, using a cursor to keep track of where it is up to in the AG + * for each iteration to restart the INOBT lookup from. + * + * We can't do this exactly with free space - once we drop the AGF lock, the + * state of the free extent is out of our control and we cannot run a discard + * safely on it in this situation. Unless, of course, we've marked the free + * extent as busy and undergoing a discard operation whilst we held the AGF + * locked. + * + * This is exactly how online discard works - free extents are marked busy when + * they are freed, and once the extent free has been committed to the journal, + * the busy extent record is marked as "undergoing discard" and the discard is + * then issued on the free extent. Once the discard completes, the busy extent + * record is removed and the extent is able to be allocated again. + * + * In the context of fstrim, if we find a free extent we need to discard, we + * don't have to discard it immediately. All we need to do it record that free + * extent as being busy and under discard, and all the allocation routines will + * now avoid trying to allocate it. Hence if we mark the extent as busy under + * the AGF lock, we can safely discard it without holding the AGF lock because + * nothing will attempt to allocate that free space until the discard completes. + * + * This also allows us to issue discards asynchronously like we do with online + * discard, and so for fast devices fstrim will run much faster as we can have + * multiple discard operations in flight at once, as well as pipeline the free + * extent search so that it overlaps in flight discard IO. + */ + +struct workqueue_struct *xfs_discard_wq; + +static void +xfs_discard_endio_work( + struct work_struct *work) +{ + struct xfs_busy_extents *extents = + container_of(work, struct xfs_busy_extents, endio_work); + + xfs_extent_busy_clear(extents->mount, &extents->extent_list, false); + kmem_free(extents->owner); +} + +/* + * Queue up the actual completion to a thread to avoid IRQ-safe locking for + * pagb_lock. + */ +static void +xfs_discard_endio( + struct bio *bio) +{ + struct xfs_busy_extents *extents = bio->bi_private; + + INIT_WORK(&extents->endio_work, xfs_discard_endio_work); + queue_work(xfs_discard_wq, &extents->endio_work); + bio_put(bio); +} + +/* + * Walk the discard list and issue discards on all the busy extents in the + * list. We plug and chain the bios so that we only need a single completion + * call to clear all the busy extents once the discards are complete. + */ +int +xfs_discard_extents( + struct xfs_mount *mp, + struct xfs_busy_extents *extents) +{ + struct xfs_extent_busy *busyp; + struct bio *bio = NULL; + struct blk_plug plug; + int error = 0; + + blk_start_plug(&plug); + list_for_each_entry(busyp, &extents->extent_list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, &bio); + if (error && error != -EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llx,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + break; + } + } + + if (bio) { + bio->bi_private = extents; + bio->bi_end_io = xfs_discard_endio; + submit_bio(bio); + } else { + xfs_discard_endio_work(&extents->endio_work); + } + blk_finish_plug(&plug); + + return error; +} + + +static int +xfs_trim_gather_extents( struct xfs_perag *pag, xfs_daddr_t start, xfs_daddr_t end, xfs_daddr_t minlen, + struct xfs_alloc_rec_incore *tcur, + struct xfs_busy_extents *extents, uint64_t *blocks_trimmed) { struct xfs_mount *mp = pag->pag_mount; - struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct xfs_btree_cur *cur; struct xfs_buf *agbp; - struct xfs_agf *agf; int error; int i; + int batch = 100; /* * Force out the log. This means any transactions that might have freed @@ -45,20 +171,28 @@ xfs_trim_extents( error = xfs_alloc_read_agf(pag, NULL, 0, &agbp); if (error) return error; - agf = agbp->b_addr; cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT); /* - * Look up the longest btree in the AGF and start with it. + * Look up the extent length requested in the AGF and start with it. */ - error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(agf->agf_longest), &i); + if (tcur->ar_startblock == NULLAGBLOCK) + error = xfs_alloc_lookup_ge(cur, 0, tcur->ar_blockcount, &i); + else + error = xfs_alloc_lookup_le(cur, tcur->ar_startblock, + tcur->ar_blockcount, &i); if (error) goto out_del_cursor; + if (i == 0) { + /* nothing of that length left in the AG, we are done */ + tcur->ar_blockcount = 0; + goto out_del_cursor; + } /* * Loop until we are done with all extents that are large - * enough to be worth discarding. + * enough to be worth discarding or we hit batch limits. */ while (i) { xfs_agblock_t fbno; @@ -73,7 +207,16 @@ xfs_trim_extents( error = -EFSCORRUPTED; break; } - ASSERT(flen <= be32_to_cpu(agf->agf_longest)); + + if (--batch <= 0) { + /* + * Update the cursor to point at this extent so we + * restart the next batch from this extent. + */ + tcur->ar_startblock = fbno; + tcur->ar_blockcount = flen; + break; + } /* * use daddr format for all range/len calculations as that is @@ -88,6 +231,7 @@ xfs_trim_extents( */ if (dlen < minlen) { trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen); + tcur->ar_blockcount = 0; break; } @@ -110,29 +254,103 @@ xfs_trim_extents( goto next_extent; } - trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen); - error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS); - if (error) - break; + xfs_extent_busy_insert_discard(pag, fbno, flen, + &extents->extent_list); *blocks_trimmed += flen; - next_extent: error = xfs_btree_decrement(cur, 0, &i); if (error) break; - if (fatal_signal_pending(current)) { - error = -ERESTARTSYS; - break; - } + /* + * If there's no more records in the tree, we are done. Set the + * cursor block count to 0 to indicate to the caller that there + * is no more extents to search. + */ + if (i == 0) + tcur->ar_blockcount = 0; } + /* + * If there was an error, release all the gathered busy extents because + * we aren't going to issue a discard on them any more. + */ + if (error) + xfs_extent_busy_clear(mp, &extents->extent_list, false); out_del_cursor: xfs_btree_del_cursor(cur, error); xfs_buf_relse(agbp); return error; } +static bool +xfs_trim_should_stop(void) +{ + return fatal_signal_pending(current) || freezing(current); +} + +/* + * Iterate the free list gathering extents and discarding them. We need a cursor + * for the repeated iteration of gather/discard loop, so use the longest extent + * we found in the last batch as the key to start the next. + */ +static int +xfs_trim_extents( + struct xfs_perag *pag, + xfs_daddr_t start, + xfs_daddr_t end, + xfs_daddr_t minlen, + uint64_t *blocks_trimmed) +{ + struct xfs_alloc_rec_incore tcur = { + .ar_blockcount = pag->pagf_longest, + .ar_startblock = NULLAGBLOCK, + }; + int error = 0; + + do { + struct xfs_busy_extents *extents; + + extents = kzalloc(sizeof(*extents), GFP_KERNEL); + if (!extents) { + error = -ENOMEM; + break; + } + + extents->mount = pag->pag_mount; + extents->owner = extents; + INIT_LIST_HEAD(&extents->extent_list); + + error = xfs_trim_gather_extents(pag, start, end, minlen, + &tcur, extents, blocks_trimmed); + if (error) { + kfree(extents); + break; + } + + /* + * We hand the extent list to the discard function here so the + * discarded extents can be removed from the busy extent list. + * This allows the discards to run asynchronously with gathering + * the next round of extents to discard. + * + * However, we must ensure that we do not reference the extent + * list after this function call, as it may have been freed by + * the time control returns to us. + */ + error = xfs_discard_extents(pag->pag_mount, extents); + if (error) + break; + + if (xfs_trim_should_stop()) + break; + + } while (tcur.ar_blockcount != 0); + + return error; + +} + /* * trim a range of the filesystem. * @@ -195,12 +413,12 @@ xfs_ioc_trim( for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) { error = xfs_trim_extents(pag, start, end, minlen, &blocks_trimmed); - if (error) { + if (error) last_error = error; - if (error == -ERESTARTSYS) { - xfs_perag_rele(pag); - break; - } + + if (xfs_trim_should_stop()) { + xfs_perag_rele(pag); + break; } } diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h index de92d9cc958f..2b1a85223a56 100644 --- a/fs/xfs/xfs_discard.h +++ b/fs/xfs/xfs_discard.h @@ -3,8 +3,10 @@ #define XFS_DISCARD_H 1 struct fstrim_range; -struct list_head; +struct xfs_mount; +struct xfs_busy_extents; -extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); +int xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy); +int xfs_ioc_trim(struct xfs_mount *mp, struct fstrim_range __user *fstrim); #endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 7c2fdc71e42d..746814815b1d 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -19,13 +19,13 @@ #include "xfs_log.h" #include "xfs_ag.h" -void -xfs_extent_busy_insert( - struct xfs_trans *tp, +static void +xfs_extent_busy_insert_list( struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, - unsigned int flags) + unsigned int flags, + struct list_head *busy_list) { struct xfs_extent_busy *new; struct xfs_extent_busy *busyp; @@ -40,7 +40,7 @@ xfs_extent_busy_insert( new->flags = flags; /* trace before insert to be able to see failed inserts */ - trace_xfs_extent_busy(tp->t_mountp, pag->pag_agno, bno, len); + trace_xfs_extent_busy(pag->pag_mount, pag->pag_agno, bno, len); spin_lock(&pag->pagb_lock); rbp = &pag->pagb_tree.rb_node; @@ -62,10 +62,32 @@ xfs_extent_busy_insert( rb_link_node(&new->rb_node, parent, rbp); rb_insert_color(&new->rb_node, &pag->pagb_tree); - list_add(&new->list, &tp->t_busy); + list_add(&new->list, busy_list); spin_unlock(&pag->pagb_lock); } +void +xfs_extent_busy_insert( + struct xfs_trans *tp, + struct xfs_perag *pag, + xfs_agblock_t bno, + xfs_extlen_t len, + unsigned int flags) +{ + xfs_extent_busy_insert_list(pag, bno, len, flags, &tp->t_busy); +} + +void +xfs_extent_busy_insert_discard( + struct xfs_perag *pag, + xfs_agblock_t bno, + xfs_extlen_t len, + struct list_head *busy_list) +{ + xfs_extent_busy_insert_list(pag, bno, len, XFS_EXTENT_BUSY_DISCARDED, + busy_list); +} + /* * Search for a busy extent within the range of the extent we are about to * allocate. You need to be holding the busy extent tree lock when calling diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index c37bf87e6781..0639aab336f3 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -16,9 +16,6 @@ struct xfs_alloc_arg; /* * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that * have been freed but whose transactions aren't committed to disk yet. - * - * Note that we use the transaction ID to record the transaction, not the - * transaction structure itself. See xfs_extent_busy_insert() for details. */ struct xfs_extent_busy { struct rb_node rb_node; /* ag by-bno indexed search tree */ @@ -31,11 +28,32 @@ struct xfs_extent_busy { #define XFS_EXTENT_BUSY_SKIP_DISCARD 0x02 /* do not discard */ }; +/* + * List used to track groups of related busy extents all the way through + * to discard completion. + */ +struct xfs_busy_extents { + struct xfs_mount *mount; + struct list_head extent_list; + struct work_struct endio_work; + + /* + * Owner is the object containing the struct xfs_busy_extents to free + * once the busy extents have been processed. If only the + * xfs_busy_extents object needs freeing, then point this at itself. + */ + void *owner; +}; + void xfs_extent_busy_insert(struct xfs_trans *tp, struct xfs_perag *pag, xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); void +xfs_extent_busy_insert_discard(struct xfs_perag *pag, xfs_agblock_t bno, + xfs_extlen_t len, struct list_head *busy_list); + +void xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list, bool do_discard); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ebc70aaa299c..67a99d94701e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -16,8 +16,7 @@ #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_trace.h" - -struct workqueue_struct *xfs_discard_wq; +#include "xfs_discard.h" /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -103,7 +102,7 @@ xlog_cil_ctx_alloc(void) ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); INIT_LIST_HEAD(&ctx->committing); - INIT_LIST_HEAD(&ctx->busy_extents); + INIT_LIST_HEAD(&ctx->busy_extents.extent_list); INIT_LIST_HEAD(&ctx->log_items); INIT_LIST_HEAD(&ctx->lv_chain); INIT_WORK(&ctx->push_work, xlog_cil_push_work); @@ -132,7 +131,7 @@ xlog_cil_push_pcp_aggregate( if (!list_empty(&cilpcp->busy_extents)) { list_splice_init(&cilpcp->busy_extents, - &ctx->busy_extents); + &ctx->busy_extents.extent_list); } if (!list_empty(&cilpcp->log_items)) list_splice_init(&cilpcp->log_items, &ctx->log_items); @@ -708,76 +707,6 @@ xlog_cil_free_logvec( } } -static void -xlog_discard_endio_work( - struct work_struct *work) -{ - struct xfs_cil_ctx *ctx = - container_of(work, struct xfs_cil_ctx, discard_endio_work); - struct xfs_mount *mp = ctx->cil->xc_log->l_mp; - - xfs_extent_busy_clear(mp, &ctx->busy_extents, false); - kmem_free(ctx); -} - -/* - * Queue up the actual completion to a thread to avoid IRQ-safe locking for - * pagb_lock. Note that we need a unbounded workqueue, otherwise we might - * get the execution delayed up to 30 seconds for weird reasons. - */ -static void -xlog_discard_endio( - struct bio *bio) -{ - struct xfs_cil_ctx *ctx = bio->bi_private; - - INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work); - queue_work(xfs_discard_wq, &ctx->discard_endio_work); - bio_put(bio); -} - -static void -xlog_discard_busy_extents( - struct xfs_mount *mp, - struct xfs_cil_ctx *ctx) -{ - struct list_head *list = &ctx->busy_extents; - struct xfs_extent_busy *busyp; - struct bio *bio = NULL; - struct blk_plug plug; - int error = 0; - - ASSERT(xfs_has_discard(mp)); - - blk_start_plug(&plug); - list_for_each_entry(busyp, list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); - - error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), - XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, &bio); - if (error && error != -EOPNOTSUPP) { - xfs_info(mp, - "discard failed for extent [0x%llx,%u], error %d", - (unsigned long long)busyp->bno, - busyp->length, - error); - break; - } - } - - if (bio) { - bio->bi_private = ctx; - bio->bi_end_io = xlog_discard_endio; - submit_bio(bio); - } else { - xlog_discard_endio_work(&ctx->discard_endio_work); - } - blk_finish_plug(&plug); -} - /* * Mark all items committed and clear busy extents. We free the log vector * chains in a separate pass so that we unpin the log items as quickly as @@ -807,8 +736,8 @@ xlog_cil_committed( xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain, ctx->start_lsn, abort); - xfs_extent_busy_sort(&ctx->busy_extents); - xfs_extent_busy_clear(mp, &ctx->busy_extents, + xfs_extent_busy_sort(&ctx->busy_extents.extent_list); + xfs_extent_busy_clear(mp, &ctx->busy_extents.extent_list, xfs_has_discard(mp) && !abort); spin_lock(&ctx->cil->xc_push_lock); @@ -817,10 +746,14 @@ xlog_cil_committed( xlog_cil_free_logvec(&ctx->lv_chain); - if (!list_empty(&ctx->busy_extents)) - xlog_discard_busy_extents(mp, ctx); - else - kmem_free(ctx); + if (!list_empty(&ctx->busy_extents.extent_list)) { + ctx->busy_extents.mount = mp; + ctx->busy_extents.owner = ctx; + xfs_discard_extents(mp, &ctx->busy_extents); + return; + } + + kmem_free(ctx); } void diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index af87648331d5..fa3ad1d7b31c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -6,6 +6,8 @@ #ifndef __XFS_LOG_PRIV_H__ #define __XFS_LOG_PRIV_H__ +#include "xfs_extent_busy.h" /* for struct xfs_busy_extents */ + struct xfs_buf; struct xlog; struct xlog_ticket; @@ -223,12 +225,11 @@ struct xfs_cil_ctx { struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ atomic_t space_used; /* aggregate size of regions */ - struct list_head busy_extents; /* busy extents in chkpt */ + struct xfs_busy_extents busy_extents; struct list_head log_items; /* log items in chkpt */ struct list_head lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ - struct work_struct discard_endio_work; struct work_struct push_work; atomic_t order_id; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index cecd2b7bd033..430f0ae0dde2 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -36,6 +36,7 @@ struct ms_hyperv_info { u32 nested_features; u32 max_vp_index; u32 max_lp_index; + u8 vtl; union { u32 isolation_config_a; struct { @@ -54,7 +55,6 @@ struct ms_hyperv_info { }; }; u64 shared_gpa_boundary; - u8 vtl; }; extern struct ms_hyperv_info ms_hyperv; extern bool hv_nested; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..afd94c9b8b8a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1480,6 +1480,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..98b8cea904fe 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +236,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +262,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +282,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) + +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +492,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a34ac7f00c86..d3c51a507508 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2922,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, diff --git a/include/linux/dpll.h b/include/linux/dpll.h index bbc480cd2932..578fc5fa3750 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -68,6 +68,18 @@ struct dpll_pin_ops { int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, const u32 prio, struct netlink_ext_ack *extack); + int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, + struct netlink_ext_ack *extack); + int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack); + int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const s32 phase_adjust, + struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { @@ -91,6 +103,11 @@ struct dpll_pin_frequency { #define DPLL_PIN_FREQUENCY_DCF77 \ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) +struct dpll_pin_phase_adjust_range { + s32 min; + s32 max; +}; + struct dpll_pin_properties { const char *board_label; const char *panel_label; @@ -99,6 +116,7 @@ struct dpll_pin_properties { unsigned long capabilities; u32 freq_supported_num; struct dpll_pin_frequency *freq_supported; + struct dpll_pin_phase_adjust_range phase_range; }; #if IS_ENABLED(CONFIG_DPLL) diff --git a/include/linux/filter.h b/include/linux/filter.h index e8822bd595f9..a4953fafc8cb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,7 +736,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { @@ -1329,6 +1329,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0b52da4f2346..db909ef79be4 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -24,6 +24,12 @@ struct i2c_client; +/* notifier actions. notifier call data is the struct i3c_bus */ +enum { + I3C_NOTIFY_BUS_ADD, + I3C_NOTIFY_BUS_REMOVE, +}; + struct i3c_master_controller; struct i3c_bus; struct i3c_device; @@ -652,4 +658,9 @@ void i3c_master_queue_ibi(struct i3c_dev_desc *dev, struct i3c_ibi_slot *slot); struct i3c_ibi_slot *i3c_master_get_free_ibi_slot(struct i3c_dev_desc *dev); +void i3c_for_each_bus_locked(int (*fn)(struct i3c_bus *bus, void *data), + void *data); +int i3c_register_notifier(struct notifier_block *nb); +int i3c_unregister_notifier(struct notifier_block *nb); + #endif /* I3C_MASTER_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8fbe22de16ef..820bca965fb6 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -367,6 +367,8 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 92434814c855..52e982bc0f50 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1029,6 +1029,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b23d8ff286a1..4df6d1c12437 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -312,6 +312,7 @@ enum { MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, MLX5_CMD_OP_MAX }; @@ -1934,6 +1935,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 match_definer_format_supported[0x40]; }; +enum { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000, +}; + +enum { + MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200, +}; + struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; @@ -1948,9 +1957,15 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x16]; + u8 reserved_at_ca[0x6]; + u8 migration_in_chunks[0x1]; + u8 reserved_at_d1[0xf]; + + u8 cross_vhca_object_to_object_supported[0x20]; + + u8 allowed_object_for_other_vhca_access[0x40]; - u8 reserved_at_e0[0xc0]; + u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -6369,6 +6384,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_modify_header_arg_bits { u8 reserved_at_0[0x80]; @@ -6391,6 +6428,24 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -11920,6 +11975,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { @@ -12395,7 +12451,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 op_mod[0x10]; u8 incremental[0x1]; - u8 reserved_at_41[0xf]; + u8 chunk[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -12411,7 +12468,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 required_umem_size[0x20]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x20]; + + u8 remaining_total_size[0x40]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_save_vhca_state_in_bits { @@ -12443,7 +12504,7 @@ struct mlx5_ifc_save_vhca_state_out_bits { u8 actual_image_size[0x20]; - u8 reserved_at_60[0x20]; + u8 next_required_umem_size[0x20]; }; struct mlx5_ifc_load_vhca_state_in_bits { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36c5b43999e6..74b49c4c7a52 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -125,18 +125,7 @@ struct page { struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; - union { - /** - * dma_addr_upper: might require a 64-bit - * value on 32-bit architectures. - */ - unsigned long dma_addr_upper; - /** - * For frag page support, not supported in - * 32-bit architectures with 64-bit DMA. - */ - atomic_long_t pp_frag_count; - }; + atomic_long_t pp_frag_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e070a4540fba..1c7681263d30 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -490,11 +490,18 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. + * Return true if we schedule a NAPI or false if not. + * Refer to napi_schedule_prep() for additional reason on why + * a NAPI might not be scheduled. */ -static inline void napi_schedule(struct napi_struct *n) +static inline bool napi_schedule(struct napi_struct *n) { - if (napi_schedule_prep(n)) + if (napi_schedule_prep(n)) { __napi_schedule(n); + return true; + } + + return false; } /** @@ -509,16 +516,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) __napi_schedule_irqoff(n); } -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline bool napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return true; - } - return false; -} - /** * napi_complete_done - NAPI processing complete * @n: NAPI context @@ -1290,9 +1287,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1567,10 +1562,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -4002,32 +3995,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 468328b1e1dd..ef8619f48920 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -14,6 +14,7 @@ #include <linux/rwsem.h> struct posix_clock; +struct posix_clock_context; /** * struct posix_clock_operations - functional interface to the clock @@ -50,18 +51,18 @@ struct posix_clock_operations { /* * Optional character device methods: */ - long (*ioctl) (struct posix_clock *pc, - unsigned int cmd, unsigned long arg); + long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); - int (*open) (struct posix_clock *pc, fmode_t f_mode); + int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode); - __poll_t (*poll) (struct posix_clock *pc, - struct file *file, poll_table *wait); + __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file, + poll_table *wait); - int (*release) (struct posix_clock *pc); + int (*release)(struct posix_clock_context *pccontext); - ssize_t (*read) (struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); + ssize_t (*read)(struct posix_clock_context *pccontext, uint flags, + char __user *buf, size_t cnt); }; /** @@ -91,6 +92,24 @@ struct posix_clock { }; /** + * struct posix_clock_context - represents clock file operations context + * + * @clk: Pointer to the clock + * @private_clkdata: Pointer to user data + * + * Drivers should use struct posix_clock_context during specific character + * device file operation methods to access the posix clock. + * + * Drivers can store a private data structure during the open operation + * if they have specific information that is required in other file + * operations. + */ +struct posix_clock_context { + struct posix_clock *clk; + void *private_clkdata; +}; + +/** * posix_clock_register() - register a new clock * @clk: Pointer to the clock. Caller must provide 'ops' field * @dev: Pointer to the initialized device. Caller must provide diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..4fa4ef0a173a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..97bfef071255 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1309,7 +1309,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2016,7 +2016,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..5eb88a66eb68 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index b01cf9ac2437..e302c0e804d0 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -177,6 +177,9 @@ struct vsock_transport { /* Read a single skb */ int (*read_skb)(struct vsock_sock *, skb_read_actor_t); + + /* Zero-copy. */ + bool (*msgzerocopy_allow)(void); }; /**** CORE ****/ @@ -241,4 +244,8 @@ static inline void __init vsock_bpf_build_proto(void) {} #endif +static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) +{ + return t->msgzerocopy_allow && t->msgzerocopy_allow(); +} #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a587e83fc169..845dce805de7 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -80,6 +80,7 @@ FN(IPV6_NDISC_BAD_OPTIONS) \ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ + FN(TC_ERROR) \ FNe(MAX) /** @@ -345,6 +346,8 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ SKB_DROP_REASON_QUEUE_PURGE, + /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */ + SKB_DROP_REASON_TC_ERROR, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5d2fcc137b88..086d1193c9ef 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -114,7 +114,10 @@ struct inet_connection_sock { __u8 quick; /* Scheduled number of quick acks */ __u8 pingpong; /* The session is interactive */ __u8 retry; /* Number of attempts */ - __u32 ato; /* Predicted tick of soft clock */ + #define ATO_BITS 8 + __u32 ato:ATO_BITS, /* Predicted tick of soft clock */ + lrcv_flowlabel:20, /* last received ipv6 flowlabel */ + unused:4; unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ @@ -325,11 +328,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 1 - static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { - inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; + inet_csk(sk)->icsk_ack.pingpong = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); } static inline void inet_csk_exit_pingpong_mode(struct sock *sk) @@ -339,7 +341,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk) static inline bool inet_csk_in_pingpong_mode(struct sock *sk) { - return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + return inet_csk(sk)->icsk_ack.pingpong >= + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh); +} + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; } static inline bool inet_csk_has_ulp(const struct sock *sk) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/macsec.h b/include/net/macsec.h index 75a6f4863c83..ebf9bc54036a 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -258,6 +258,7 @@ struct macsec_context { struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct { + bool update_pn; unsigned char assoc_num; u8 key[MACSEC_MAX_KEY_LEN]; union { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4085765c3370..cba3ccf03fcc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -160,10 +160,6 @@ static inline struct net *nf_ct_net(const struct nf_conn *ct) return read_pnet(&ct->ct_net); } -/* Alter reply tuple (maybe alter helper). */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply); - /* Is this tuple taken? (ignoring any belonging to the given conntrack). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, @@ -284,6 +280,16 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +static inline void nf_conntrack_alter_reply(struct nf_conn *ct, + const struct nf_conntrack_tuple *newreply) +{ + /* Must be unconfirmed, so not in hash table yet */ + if (WARN_ON(nf_ct_is_confirmed(ct))) + return; + + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; +} + #define nfct_time_stamp ((u32)(jiffies)) /* jiffies until ct expires, 0 if already expired */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7c816359d5a9..9fb16485d08f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1198,10 +1198,13 @@ static inline void nft_use_inc_restore(u32 *use) * @hgenerator: handle generator state * @handle: table handle * @use: number of chain references to this table + * @family:address family * @flags: table flag (see enum nft_table_flags) * @genmask: generation mask - * @afinfo: address family info + * @nlpid: netlink port ID * @name: name of the table + * @udlen: length of the user data + * @udata: user data * @validate_state: internal, set when transaction adds jumps */ struct nft_table { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d96d05b08819..73f43f699199 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,8 @@ struct netns_ipv4 { u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; u8 sysctl_tcp_backlog_ack_defer; + u8 sysctl_tcp_pingpong_thresh; + int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 8e7751464ff5..8f64adf86f5b 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -197,7 +197,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ +#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \ (sizeof(dma_addr_t) > sizeof(unsigned long)) /** @@ -211,17 +211,25 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret = page->dma_addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; return ret; } -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { + page->dma_addr = addr >> PAGE_SHIFT; + + /* We assume page alignment to shave off bottom bits, + * if this "compression" doesn't work we need to drop. + */ + return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT; + } + page->dma_addr = addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - page->dma_addr_upper = upper_32_bits(addr); + return false; } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f308e8268651..a76c9171db0e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } +static inline void tcf_set_drop_reason(struct tcf_result *res, + enum skb_drop_reason reason) +{ + res->drop_reason = reason; +} + static inline void __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { diff --git a/include/net/route.h b/include/net/route.h index 5c248a8e3d0e..980ab474eabd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -136,12 +136,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net, struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache); - struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c7318c73cfd6..dcb9160e6467 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -324,7 +324,6 @@ struct Qdisc_ops { struct module *owner; }; - struct tcf_result { union { struct { @@ -332,8 +331,8 @@ struct tcf_result { u32 classid; }; const struct tcf_proto *goto_tp; - }; + enum skb_drop_reason drop_reason; }; struct tcf_chain; diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb0a2855311..32146088a095 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ +static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); + #if HZ >= 100 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ #define TCP_ATO_MIN ((unsigned)(HZ/25)) @@ -2079,7 +2081,11 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)) INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)); +#ifdef CONFIG_INET void tcp_gro_complete(struct sk_buff *skb); +#else +static inline void tcp_gro_complete(struct sk_buff *skb) { } +#endif void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); diff --git a/include/net/tls.h b/include/net/tls.h index a2b44578dcb7..962f0c501111 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -61,7 +61,8 @@ struct tls_rec; #define TLS_AAD_SPACE_SIZE 13 -#define MAX_IV_SIZE 16 +#define TLS_MAX_IV_SIZE 16 +#define TLS_MAX_SALT_SIZE 4 #define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 #define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE @@ -149,6 +150,7 @@ struct tls_record_info { skb_frag_t frags[MAX_SKB_FRAGS]; }; +#define TLS_DRIVER_STATE_SIZE_TX 16 struct tls_offload_context_tx { struct crypto_aead *aead_send; spinlock_t lock; /* protects records list */ @@ -162,17 +164,13 @@ struct tls_offload_context_tx { void (*sk_destruct)(struct sock *sk); struct work_struct destruct_work; struct tls_context *ctx; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_TX 16 + u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_TX \ - (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) - enum tls_context_flags { /* tls_device_down was called after the netdev went down, device state * was released, and kTLS works in software, even though rx_conf is @@ -193,8 +191,8 @@ enum tls_context_flags { }; struct cipher_context { - char *iv; - char *rec_seq; + char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; + char rec_seq[TLS_MAX_REC_SEQ_SIZE]; }; union tls_crypto_context { @@ -302,6 +300,7 @@ struct tls_offload_resync_async { u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; +#define TLS_DRIVER_STATE_SIZE_RX 8 struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; @@ -325,17 +324,13 @@ struct tls_offload_context_rx { struct tls_offload_resync_async *resync_async; }; }; - u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE_RX 8 + u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8); }; -#define TLS_OFFLOAD_CONTEXT_SIZE_RX \ - (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) - struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 21ba0a25f936..4d0578fab01a 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -162,6 +162,14 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, void udp_tunnel_sock_release(struct socket *sock); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache); + struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, __be16 flags, __be64 tunnel_id, int md_size); diff --git a/include/sound/soc.h b/include/sound/soc.h index fa2337a3cf4c..37f9d3fe302a 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1126,6 +1126,8 @@ struct snd_soc_pcm_runtime { unsigned int pop_wait:1; unsigned int fe_compr:1; /* for Dynamic PCM */ + bool initialized; + int num_components; struct snd_soc_component *components[]; /* CPU/Codec/Platform */ }; diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 8d7402c13e56..eaf9f248619f 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -44,6 +44,16 @@ extern "C" { #define NOUVEAU_GETPARAM_PTIMER_TIME 14 #define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 #define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 + +/** + * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX + * + * Query the maximum amount of IBs that can be pushed through a single + * &drm_nouveau_exec structure and hence a single &DRM_IOCTL_NOUVEAU_EXEC + * ioctl(). + */ +#define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17 + struct drm_nouveau_getparam { __u64 param; __u64 value; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5f13db15a3c7..7ba61b75bc0e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by @@ -3264,6 +3269,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -5096,6 +5106,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -6532,6 +6544,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ @@ -6547,6 +6560,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ @@ -6960,6 +6974,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6972,6 +6987,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7006,6 +7022,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -7307,9 +7326,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 20ef0718f8dc..715a491d2727 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -138,6 +138,8 @@ enum dpll_pin_capabilities { DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE = 4, }; +#define DPLL_PHASE_OFFSET_DIVIDER 1000 + enum dpll_a { DPLL_A_ID = 1, DPLL_A_MODULE_NAME, @@ -173,6 +175,10 @@ enum dpll_a_pin { DPLL_A_PIN_CAPABILITIES, DPLL_A_PIN_PARENT_DEVICE, DPLL_A_PIN_PARENT_PIN, + DPLL_A_PIN_PHASE_ADJUST_MIN, + DPLL_A_PIN_PHASE_ADJUST_MAX, + DPLL_A_PIN_PHASE_ADJUST, + DPLL_A_PIN_PHASE_OFFSET, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 4d0ad22f83b5..9efc42382fdb 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -18,11 +18,7 @@ struct sockaddr_ll { unsigned short sll_hatype; unsigned char sll_pkttype; unsigned char sll_halen; - union { - unsigned char sll_addr[8]; - /* Actual length is in sll_halen. */ - __DECLARE_FLEX_ARRAY(unsigned char, sll_addr_flex); - }; + unsigned char sll_addr[8]; }; /* Packet types */ diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 05cc35fc94ac..da700999cad4 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -224,6 +224,8 @@ struct ptp_pin_desc { _IOWR(PTP_CLK_MAGIC, 17, struct ptp_sys_offset_precise) #define PTP_SYS_OFFSET_EXTENDED2 \ _IOWR(PTP_CLK_MAGIC, 18, struct ptp_sys_offset_extended) +#define PTP_MASK_CLEAR_ALL _IO(PTP_CLK_MAGIC, 19) +#define PTP_MASK_EN_SINGLE _IOW(PTP_CLK_MAGIC, 20, unsigned int) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index c60ca33eac59..ed07181d4eff 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -191,4 +191,21 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) +/* MSG_ZEROCOPY notifications are encoded in the standard error format, + * sock_extended_err. See Documentation/networking/msg_zerocopy.rst in + * kernel source tree for more details. + */ + +/* 'cmsg_level' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define SOL_VSOCK 287 + +/* 'cmsg_type' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define VSOCK_RECVERR 1 + #endif /* _UAPI_VM_SOCKETS_H */ diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 1ecc8c748768..522196dfb0ff 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1151,9 +1151,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL); if (!wq) return ERR_PTR(-ENOMEM); - ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) - goto err_wq; refcount_inc(&data->hash->refs); wq->hash = data->hash; @@ -1186,13 +1183,14 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) wq->task = get_task_struct(data->task); atomic_set(&wq->worker_refs, 1); init_completion(&wq->worker_done); + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); + if (ret) + goto err; + return wq; err: io_wq_put_hash(data->hash); - cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - free_cpumask_var(wq->cpu_mask); -err_wq: kfree(wq); return ERR_PTR(ret); } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 783ed0fff71b..d839a80a6751 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2686,7 +2686,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, { struct page **page_array; unsigned int nr_pages; - int ret; + int ret, i; *npages = 0; @@ -2716,6 +2716,20 @@ err: */ if (page_array[0] != page_array[ret - 1]) goto err; + + /* + * Can't support mapping user allocated ring memory on 32-bit archs + * where it could potentially reside in highmem. Just fail those with + * -EINVAL, just like we did on kernels that didn't support this + * feature. + */ + for (i = 0; i < nr_pages; i++) { + if (PageHighMem(page_array[i])) { + ret = -EINVAL; + goto err; + } + } + *pages = page_array; *npages = nr_pages; return page_to_virt(page_array[0]); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 547c30582fb8..0bc145614a6e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -86,20 +86,33 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); -#define io_lockdep_assert_cq_locked(ctx) \ - do { \ - lockdep_assert(in_task()); \ - \ - if (ctx->flags & IORING_SETUP_IOPOLL) { \ - lockdep_assert_held(&ctx->uring_lock); \ - } else if (!ctx->task_complete) { \ - lockdep_assert_held(&ctx->completion_lock); \ - } else if (ctx->submitter_task->flags & PF_EXITING) { \ - lockdep_assert(current_work()); \ - } else { \ - lockdep_assert(current == ctx->submitter_task); \ - } \ - } while (0) +#if defined(CONFIG_PROVE_LOCKING) +static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) +{ + lockdep_assert(in_task()); + + if (ctx->flags & IORING_SETUP_IOPOLL) { + lockdep_assert_held(&ctx->uring_lock); + } else if (!ctx->task_complete) { + lockdep_assert_held(&ctx->completion_lock); + } else if (ctx->submitter_task) { + /* + * ->submitter_task may be NULL and we can still post a CQE, + * if the ring has been setup with IORING_SETUP_R_DISABLED. + * Not from an SQE, as those cannot be submitted, but via + * updating tagged resources. + */ + if (ctx->submitter_task->flags & PF_EXITING) + lockdep_assert(current_work()); + else + lockdep_assert(current == ctx->submitter_task); + } +} +#else +static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) +{ +} +#endif static inline void io_req_task_work_add(struct io_kiocb *req) { diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 556f4df25b0f..9123138aa9f4 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -477,7 +477,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, { struct io_uring_buf_ring *br; struct page **pages; - int nr_pages; + int i, nr_pages; pages = io_pin_pages(reg->ring_addr, flex_array_size(br, bufs, reg->ring_entries), @@ -485,6 +485,17 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, if (IS_ERR(pages)) return PTR_ERR(pages); + /* + * Apparently some 32-bit boxes (ARM) will return highmem pages, + * which then need to be mapped. We could support that, but it'd + * complicate the code and slowdown the common cases quite a bit. + * So just error out, returning -EINVAL just like we did on kernels + * that didn't support mapped buffer rings. + */ + for (i = 0; i < nr_pages; i++) + if (PageHighMem(pages[i])) + goto error_unpin; + br = page_address(pages[0]); #ifdef SHM_COLOUR /* @@ -496,13 +507,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, * should use IOU_PBUF_RING_MMAP instead, and liburing will handle * this transparently. */ - if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) { - int i; - - for (i = 0; i < nr_pages; i++) - unpin_user_page(pages[i]); - return -EINVAL; - } + if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) + goto error_unpin; #endif bl->buf_pages = pages; bl->buf_nr_pages = nr_pages; @@ -510,6 +516,11 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, bl->is_mapped = 1; bl->is_mmap = 0; return 0; +error_unpin: + for (i = 0; i < nr_pages; i++) + unpin_user_page(pages[i]); + kvfree(pages); + return -EINVAL; } static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg, diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 96856f130cbf..833faa04461b 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -793,8 +793,6 @@ __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); - BTF_TYPE_EMIT(struct btf_iter_num); - /* start == end is legit, it's an empty range and we'll just get NULL * on first (and any subsequent) bpf_iter_num_next() call */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 69101200c124..15d71d2986d3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7850,6 +7850,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return BTF_KFUNC_HOOK_CGROUP_SKB; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980..74ad2215e1ba 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1450,18 +1450,22 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user + * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is + * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX + * uaddr. * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). * - * socket is expected to be of type INET or INET6. + * socket is expected to be of type INET, INET6 or UNIX. * * This function will return %-EPERM if an attached program is found and * returned value != 1 during execution. In all other cases, 0 is returned. */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) @@ -1473,21 +1477,31 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; + int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). */ - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && + sk->sk_family != AF_UNIX) return 0; if (!ctx.uaddr) { memset(&unspec, 0, sizeof(unspec)); ctx.uaddr = (struct sockaddr *)&unspec; + ctx.uaddrlen = 0; + } else { + ctx.uaddrlen = *uaddrlen; } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, - 0, flags); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); + + if (!ret && uaddr) + *uaddrlen = ctx.uaddrlen; + + return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); @@ -2520,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_get_retval_proto; @@ -2535,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_set_retval_proto; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index dd1c69ee3375..62a53ebfedf9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1272,7 +1272,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla if (in_nmi()) return -EOPNOTSUPP; - if (flags > BPF_F_TIMER_ABS) + if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN)) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; @@ -1286,6 +1286,9 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla else mode = HRTIMER_MODE_REL_SOFT; + if (flags & BPF_F_TIMER_CPU_PIN) + mode |= HRTIMER_MODE_PINNED; + hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); @@ -2549,6 +2552,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU) +BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c index 007d98c799e2..1394168062e8 100644 --- a/kernel/bpf/mprog.c +++ b/kernel/bpf/mprog.c @@ -401,14 +401,16 @@ int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, struct bpf_mprog_cp *cp; struct bpf_prog *prog; const u32 flags = 0; + u32 id, count = 0; + u64 revision = 1; int i, ret = 0; - u32 id, count; - u64 revision; if (attr->query.query_flags || attr->query.attach_flags) return -EINVAL; - revision = bpf_mprog_revision(entry); - count = bpf_mprog_total(entry); + if (entry) { + revision = bpf_mprog_revision(entry); + count = bpf_mprog_total(entry); + } if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) return -EFAULT; if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 458bb80b14d5..d6b277482085 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -28,7 +28,7 @@ struct bpf_stack_map { void *elems; struct pcpu_freelist freelist; u32 n_buckets; - struct stack_map_bucket *buckets[]; + struct stack_map_bucket *buckets[] __counted_by(n_buckets); }; static inline bool stack_map_use_build_id(struct bpf_map *map) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 85c1d908f70f..341f8cb4405c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2446,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return 0; default: return -EINVAL; @@ -3374,7 +3379,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) static int bpf_perf_link_fill_common(const struct perf_event *event, char __user *uname, u32 ulen, u64 *probe_offset, u64 *probe_addr, - u32 *fd_type) + u32 *fd_type, unsigned long *missed) { const char *buf; u32 prog_id; @@ -3385,7 +3390,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, return -EINVAL; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, - probe_offset, probe_addr); + probe_offset, probe_addr, missed); if (err) return err; if (!uname) @@ -3408,6 +3413,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, static int bpf_perf_link_fill_kprobe(const struct perf_event *event, struct bpf_link_info *info) { + unsigned long missed; char __user *uname; u64 addr, offset; u32 ulen, type; @@ -3416,7 +3422,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, &missed); if (err) return err; if (type == BPF_FD_TYPE_KRETPROBE) @@ -3425,6 +3431,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KPROBE; info->perf_event.kprobe.offset = offset; + info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; @@ -3444,7 +3451,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, NULL); if (err) return err; @@ -3480,7 +3487,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL); + return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, @@ -3676,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; case BPF_CGROUP_SOCK_OPS: return BPF_PROG_TYPE_SOCK_OPS; @@ -3800,7 +3812,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; struct bpf_prog *prog; - u32 mask; int ret; if (CHECK_ATTR(BPF_PROG_ATTACH)) @@ -3809,10 +3820,16 @@ static int bpf_prog_attach(const union bpf_attr *attr) ptype = attach_type_to_prog_type(attr->attach_type); if (ptype == BPF_PROG_TYPE_UNSPEC) return -EINVAL; - mask = bpf_mprog_supported(ptype) ? - BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE; - if (attr->attach_flags & ~mask) - return -EINVAL; + if (bpf_mprog_supported(ptype)) { + if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) + return -EINVAL; + } else { + if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) + return -EINVAL; + if (attr->relative_fd || + attr->expected_revision) + return -EINVAL; + } prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); if (IS_ERR(prog)) @@ -3882,6 +3899,10 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); } + } else if (attr->attach_flags || + attr->relative_fd || + attr->expected_revision) { + return -EINVAL; } switch (ptype) { @@ -3917,7 +3938,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } -#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags +#define BPF_PROG_QUERY_LAST_FIELD query.revision static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -3940,14 +3961,19 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: case BPF_CGROUP_SYSCTL: @@ -4813,7 +4839,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, - &probe_addr); + &probe_addr, NULL); if (!err) err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 7473068ed313..fef17628341f 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -7,7 +7,9 @@ #include <linux/fs.h> #include <linux/fdtable.h> #include <linux/filter.h> +#include <linux/bpf_mem_alloc.h> #include <linux/btf_ids.h> +#include <linux/mm_types.h> #include "mmap_unlock_work.h" static const char * const iter_task_type_names[] = { @@ -803,6 +805,95 @@ const struct bpf_func_proto bpf_find_vma_proto = { .arg5_type = ARG_ANYTHING, }; +struct bpf_iter_task_vma_kern_data { + struct task_struct *task; + struct mm_struct *mm; + struct mmap_unlock_irq_work *work; + struct vma_iterator vmi; +}; + +struct bpf_iter_task_vma { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_iter_task_vma */ +struct bpf_iter_task_vma_kern { + struct bpf_iter_task_vma_kern_data *data; +} __attribute__((aligned(8))); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, u64 addr) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + bool irq_work_busy = false; + int err; + + BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma)); + + /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized + * before, so non-NULL kit->data doesn't point to previously + * bpf_mem_alloc'd bpf_iter_task_vma_kern_data + */ + kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data)); + if (!kit->data) + return -ENOMEM; + + kit->data->task = get_task_struct(task); + kit->data->mm = task->mm; + if (!kit->data->mm) { + err = -ENOENT; + goto err_cleanup_iter; + } + + /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */ + irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work); + if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) { + err = -EBUSY; + goto err_cleanup_iter; + } + + vma_iter_init(&kit->data->vmi, kit->data->mm, addr); + return 0; + +err_cleanup_iter: + if (kit->data->task) + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + /* NULL kit->data signals failed bpf_iter_task_vma initialization */ + kit->data = NULL; + return err; +} + +__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (!kit->data) /* bpf_iter_task_vma_new failed */ + return NULL; + return vma_next(&kit->data->vmi); +} + +__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (kit->data) { + bpf_mmap_unlock_mm(kit->data->work, kit->data->mm); + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + } +} + +__diag_pop(); + DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); static void do_mmap_read_unlock(struct irq_work *entry) diff --git a/kernel/bpf/tcx.c b/kernel/bpf/tcx.c index 13f0b5dc8262..1338a13a8b64 100644 --- a/kernel/bpf/tcx.c +++ b/kernel/bpf/tcx.c @@ -123,7 +123,6 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { bool ingress = attr->query.attach_type == BPF_TCX_INGRESS; struct net *net = current->nsproxy->net_ns; - struct bpf_mprog_entry *entry; struct net_device *dev; int ret; @@ -133,12 +132,7 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) ret = -ENODEV; goto out; } - entry = tcx_entry_fetch(dev, ingress); - if (!entry) { - ret = -ENOENT; - goto out; - } - ret = bpf_mprog_query(attr, uattr, entry); + ret = bpf_mprog_query(attr, uattr, tcx_entry_fetch(dev, ingress)); out: rtnl_unlock(); return ret; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c8dae2fde115..bb58987e4844 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1342,6 +1342,50 @@ static void scrub_spilled_slot(u8 *stype) *stype = STACK_MISC; } +static void print_scalar_ranges(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + const char **sep) +{ + struct { + const char *name; + u64 val; + bool omit; + } minmaxs[] = { + {"smin", reg->smin_value, reg->smin_value == S64_MIN}, + {"smax", reg->smax_value, reg->smax_value == S64_MAX}, + {"umin", reg->umin_value, reg->umin_value == 0}, + {"umax", reg->umax_value, reg->umax_value == U64_MAX}, + {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN}, + {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX}, + {"umin32", reg->u32_min_value, reg->u32_min_value == 0}, + {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX}, + }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)]; + bool neg1, neg2; + + for (m1 = &minmaxs[0]; m1 < mend; m1++) { + if (m1->omit) + continue; + + neg1 = m1->name[0] == 's' && (s64)m1->val < 0; + + verbose(env, "%s%s=", *sep, m1->name); + *sep = ","; + + for (m2 = m1 + 2; m2 < mend; m2 += 2) { + if (m2->omit || m2->val != m1->val) + continue; + /* don't mix negatives with positives */ + neg2 = m2->name[0] == 's' && (s64)m2->val < 0; + if (neg2 != neg1) + continue; + m2->omit = true; + verbose(env, "%s=", m2->name); + } + + verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val); + } +} + static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state, bool print_all) @@ -1405,34 +1449,13 @@ static void print_verifier_state(struct bpf_verifier_env *env, */ verbose_a("imm=%llx", reg->var_off.value); } else { - if (reg->smin_value != reg->umin_value && - reg->smin_value != S64_MIN) - verbose_a("smin=%lld", (long long)reg->smin_value); - if (reg->smax_value != reg->umax_value && - reg->smax_value != S64_MAX) - verbose_a("smax=%lld", (long long)reg->smax_value); - if (reg->umin_value != 0) - verbose_a("umin=%llu", (unsigned long long)reg->umin_value); - if (reg->umax_value != U64_MAX) - verbose_a("umax=%llu", (unsigned long long)reg->umax_value); + print_scalar_ranges(env, reg, &sep); if (!tnum_is_unknown(reg->var_off)) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose_a("var_off=%s", tn_buf); } - if (reg->s32_min_value != reg->smin_value && - reg->s32_min_value != S32_MIN) - verbose_a("s32_min=%d", (int)(reg->s32_min_value)); - if (reg->s32_max_value != reg->smax_value && - reg->s32_max_value != S32_MAX) - verbose_a("s32_max=%d", (int)(reg->s32_max_value)); - if (reg->u32_min_value != reg->umin_value && - reg->u32_min_value != U32_MIN) - verbose_a("u32_min=%d", (int)(reg->u32_min_value)); - if (reg->u32_max_value != reg->umax_value && - reg->u32_max_value != U32_MAX) - verbose_a("u32_max=%d", (int)(reg->u32_max_value)); } #undef verbose_a @@ -1516,7 +1539,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); - mark_verifier_state_clean(env); + if (!print_all) + mark_verifier_state_clean(env); } static inline u32 vlog_alignment(u32 pos) @@ -3114,7 +3138,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, if (class == BPF_LDX) { if (t != SRC_OP) - return BPF_SIZE(code) == BPF_DW; + return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX; /* LDX source must be ptr. */ return true; } @@ -14383,6 +14407,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -14395,6 +14421,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx + insn->off + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -14727,7 +14755,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) struct tnum enforce_attach_type_range = tnum_unknown; const struct bpf_prog *prog = env->prog; struct bpf_reg_state *reg; - struct tnum range = tnum_range(0, 1); + struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; struct bpf_func_state *frame = env->cur_state->frame[0]; @@ -14775,8 +14803,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) return -EINVAL; } - if (!tnum_in(tnum_const(0), reg->var_off)) { - verbose_invalid_scalar(env, reg, &range, "async callback", "R0"); + if (!tnum_in(const_0, reg->var_off)) { + verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0"); return -EINVAL; } return 0; @@ -14795,10 +14823,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || - env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) range = tnum_range(1, 1); if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 87e9f7e2bdc0..0f12e0a97e43 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2647,7 +2647,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, memory_bm_free(bm, PG_UNSAFE_KEEP); /* Make a copy of zero_bm so it can be created in safe pages */ - error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY); + error = memory_bm_create(&tmp, GFP_ATOMIC, PG_SAFE); if (error) goto Free; @@ -2660,7 +2660,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, goto Free; duplicate_memory_bitmap(zero_bm, &tmp); - memory_bm_free(&tmp, PG_UNSAFE_KEEP); + memory_bm_free(&tmp, PG_UNSAFE_CLEAR); /* At this point zero_bm is in safe pages and it can be used for restoring. */ if (nr_highmem > 0) { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7e0b4dd02398..0b3af1529778 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3740,12 +3740,18 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre seq = prb_next_seq(prb); + /* Flush the consoles so that records up to @seq are printed. */ + console_lock(); + console_unlock(); + for (;;) { diff = 0; /* * Hold the console_lock to guarantee safe access to - * console->seq. + * console->seq. Releasing console_lock flushes more + * records in case @seq is still not printed on all + * usable consoles. */ console_lock(); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 4492608b7d7f..458d359f5991 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -350,7 +350,8 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, * Except when the rq is capped by uclamp_max. */ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && - sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq && + !sg_policy->need_freq_update) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cb225921bbca..ef7490c4b8b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -664,6 +664,10 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta; } +/* + * Specifically: avg_runtime() + 0 must result in entity_eligible() := true + * For this to be so, the result of this function must have a left bias. + */ u64 avg_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; @@ -677,8 +681,12 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) load += weight; } - if (load) + if (load) { + /* sign flips effective floor / ceil */ + if (avg < 0) + avg -= (load - 1); avg = div_s64(avg, load); + } return cfs_rq->min_vruntime + avg; } @@ -4919,10 +4927,12 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { - u64 vslice = calc_delta_fair(se->slice, se); - u64 vruntime = avg_vruntime(cfs_rq); + u64 vslice, vruntime = avg_vruntime(cfs_rq); s64 lag = 0; + se->slice = sysctl_sched_base_slice; + vslice = calc_delta_fair(se->slice, se); + /* * Due to how V is constructed as the weighted average of entities, * adding tasks with positive lag, or removing tasks with negative lag diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 77c0c2370b6d..9de66bbbb3d1 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -19,7 +19,8 @@ */ static struct posix_clock *get_posix_clock(struct file *fp) { - struct posix_clock *clk = fp->private_data; + struct posix_clock_context *pccontext = fp->private_data; + struct posix_clock *clk = pccontext->clk; down_read(&clk->rwsem); @@ -39,6 +40,7 @@ static void put_posix_clock(struct posix_clock *clk) static ssize_t posix_clock_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -EINVAL; @@ -46,7 +48,7 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, return -ENODEV; if (clk->ops.read) - err = clk->ops.read(clk, fp->f_flags, buf, count); + err = clk->ops.read(pccontext, fp->f_flags, buf, count); put_posix_clock(clk); @@ -55,6 +57,7 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); __poll_t result = 0; @@ -62,7 +65,7 @@ static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) return EPOLLERR; if (clk->ops.poll) - result = clk->ops.poll(clk, fp, wait); + result = clk->ops.poll(pccontext, fp, wait); put_posix_clock(clk); @@ -72,6 +75,7 @@ static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) static long posix_clock_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -ENOTTY; @@ -79,7 +83,7 @@ static long posix_clock_ioctl(struct file *fp, return -ENODEV; if (clk->ops.ioctl) - err = clk->ops.ioctl(clk, cmd, arg); + err = clk->ops.ioctl(pccontext, cmd, arg); put_posix_clock(clk); @@ -90,6 +94,7 @@ static long posix_clock_ioctl(struct file *fp, static long posix_clock_compat_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -ENOTTY; @@ -97,7 +102,7 @@ static long posix_clock_compat_ioctl(struct file *fp, return -ENODEV; if (clk->ops.ioctl) - err = clk->ops.ioctl(clk, cmd, arg); + err = clk->ops.ioctl(pccontext, cmd, arg); put_posix_clock(clk); @@ -110,6 +115,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) int err; struct posix_clock *clk = container_of(inode->i_cdev, struct posix_clock, cdev); + struct posix_clock_context *pccontext; down_read(&clk->rwsem); @@ -117,14 +123,20 @@ static int posix_clock_open(struct inode *inode, struct file *fp) err = -ENODEV; goto out; } + pccontext = kzalloc(sizeof(*pccontext), GFP_KERNEL); + if (!pccontext) { + err = -ENOMEM; + goto out; + } + pccontext->clk = clk; + fp->private_data = pccontext; if (clk->ops.open) - err = clk->ops.open(clk, fp->f_mode); + err = clk->ops.open(pccontext, fp->f_mode); else err = 0; if (!err) { get_device(clk->dev); - fp->private_data = clk; } out: up_read(&clk->rwsem); @@ -133,14 +145,20 @@ out: static int posix_clock_release(struct inode *inode, struct file *fp) { - struct posix_clock *clk = fp->private_data; + struct posix_clock_context *pccontext = fp->private_data; + struct posix_clock *clk; int err = 0; + if (!pccontext) + return -ENODEV; + clk = pccontext->clk; + if (clk->ops.release) - err = clk->ops.release(clk); + err = clk->ops.release(pccontext); put_device(clk->dev); + kfree(pccontext); fp->private_data = NULL; return err; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 868008f56fec..df697c74d519 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * and don't send kprobe event into ring-buffer, * so return zero here */ + rcu_read_lock(); + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); + rcu_read_unlock(); ret = 0; goto out; } @@ -2384,7 +2387,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr) + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; @@ -2419,7 +2423,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, - probe_offset, probe_addr, + probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS @@ -2614,6 +2618,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; info->kprobe_multi.flags = kmulti_link->flags; + info->kprobe_multi.missed = kmulti_link->fp.nmissed; if (!uaddrs) return 0; @@ -2710,6 +2715,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + bpf_prog_inc_misses_counter(link->link.prog); err = 0; goto out; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3d7a180a8427..961a78ffd6d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1189,6 +1189,12 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +static unsigned long trace_kprobe_missed(struct trace_kprobe *tk) +{ + return trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; +} + /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { @@ -1200,8 +1206,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) return 0; tk = to_trace_kprobe(ev); - nmissed = trace_kprobe_is_return(tk) ? - tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; + nmissed = trace_kprobe_missed(tk); seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), @@ -1547,7 +1552,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, - u64 *probe_addr, bool perf_type_tracepoint) + u64 *probe_addr, unsigned long *missed, + bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1566,6 +1572,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *probe_addr = kallsyms_show_value(current_cred()) ? (unsigned long)tk->rp.kp.addr : 0; *symbol = tk->symbol; + if (missed) + *missed = trace_kprobe_missed(tk); return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index de753403cdaf..9c581d6da843 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -556,7 +556,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long args[SYSCALL_DEFINE_MAXARGS]; } __aligned(8) param; int i; @@ -661,7 +661,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long ret; } __aligned(8) param; diff --git a/net/Kconfig b/net/Kconfig index d532ec33f1fe..3ec6bc98fa05 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -246,7 +246,7 @@ source "net/bridge/Kconfig" source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/llc/Kconfig" -source "drivers/net/appletalk/Kconfig" +source "net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" source "net/phonet/Kconfig" @@ -508,4 +508,13 @@ config NETDEV_ADDR_LIST_TEST default KUNIT_ALL_TESTS depends on KUNIT +config NET_TEST + tristate "KUnit tests for networking" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + KUnit tests covering core networking infra, such as sk_buff. + + If unsure, say N. + endif # if NET diff --git a/net/appletalk/Kconfig b/net/appletalk/Kconfig new file mode 100644 index 000000000000..041141abf925 --- /dev/null +++ b/net/appletalk/Kconfig @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Appletalk configuration +# +config ATALK + tristate "Appletalk protocol support" + select LLC + help + AppleTalk is the protocol that Apple computers can use to communicate + on a network. If your Linux box is connected to such a network and you + wish to connect to it, say Y. You will need to use the netatalk package + so that your Linux box can act as a print and file server for Macs as + well as access AppleTalk printers. Check out + <http://www.zettabyte.net/netatalk/> on the WWW for details. + EtherTalk is the name used for AppleTalk over Ethernet and the + cheaper and slower LocalTalk is AppleTalk over a proprietary Apple + network using serial links. EtherTalk and LocalTalk are fully + supported by Linux. + + General information about how to connect Linux, Windows machines and + Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. The + NET3-4-HOWTO, available from + <http://www.tldp.org/docs.html#howto>, contains valuable + information as well. + + To compile this driver as a module, choose M here: the module will be + called appletalk. You almost certainly want to compile it as a + module so you can restart your AppleTalk stack without rebooting + your machine. I hear that the GNU boycott of Apple is over, so + even politically correct people are allowed to say Y here. diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 8978fb6212ff..9ba04a69ec2a 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1284,39 +1284,6 @@ out: return err; } -#if IS_ENABLED(CONFIG_IPDDP) -static __inline__ int is_ip_over_ddp(struct sk_buff *skb) -{ - return skb->data[12] == 22; -} - -static int handle_ip_over_ddp(struct sk_buff *skb) -{ - struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); - struct net_device_stats *stats; - - /* This needs to be able to handle ipddp"N" devices */ - if (!dev) { - kfree_skb(skb); - return NET_RX_DROP; - } - - skb->protocol = htons(ETH_P_IP); - skb_pull(skb, 13); - skb->dev = dev; - skb_reset_transport_header(skb); - - stats = netdev_priv(dev); - stats->rx_packets++; - stats->rx_bytes += skb->len + 13; - return netif_rx(skb); /* Send the SKB up to a higher place. */ -} -#else -/* make it easy for gcc to optimize this test out, i.e. kill the code */ -#define is_ip_over_ddp(skb) 0 -#define handle_ip_over_ddp(skb) 0 -#endif - static int atalk_route_packet(struct sk_buff *skb, struct net_device *dev, struct ddpehdr *ddp, __u16 len_hops, int origlen) { @@ -1480,9 +1447,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, return atalk_route_packet(skb, dev, ddp, len_hops, origlen); } - /* if IP over DDP is not selected this code will be optimized out */ - if (is_ip_over_ddp(skb)) - return handle_ip_over_ddp(skb); /* * Which socket - atalk_search_socket() looks for a *full match* * of the <net, node, port> tuple. diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e69a872bfc1d..a98ad763b368 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -661,14 +661,30 @@ static int __fdb_flush_validate_ifindex(const struct net_bridge *br, return 0; } -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = { + [NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), + [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack) { - u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; - struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid }; + struct net_bridge_fdb_flush_desc desc = {}; + struct ndmsg *ndm = nlmsg_data(nlh); struct net_bridge_port *p = NULL; + struct nlattr *tb[NDA_MAX + 1]; struct net_bridge *br; + u8 ndm_flags; + int err; + + ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, + br_fdb_del_bulk_policy, extack); + if (err) + return err; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); @@ -681,6 +697,9 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], br = p->br; } + if (tb[NDA_VLAN]) + desc.vlan_id = nla_get_u16(tb[NDA_VLAN]); + if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); return -EINVAL; @@ -703,7 +722,7 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); } if (tb[NDA_IFINDEX]) { - int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]); + int ifidx = nla_get_s32(tb[NDA_IFINDEX]); err = __fdb_flush_validate_ifindex(br, ifidx, extack); if (err) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a1f4acfa6994..cbbe35278459 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -847,8 +847,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, diff --git a/net/can/isotp.c b/net/can/isotp.c index f02b5d3e4733..d1c6f206f429 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -948,21 +948,18 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (!so->bound || so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; -wait_free_buffer: - /* we do not support multiple buffers - for now */ - if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT)) - return -EAGAIN; + while (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { + /* we do not support multiple buffers - for now */ + if (msg->msg_flags & MSG_DONTWAIT) + return -EAGAIN; - /* wait for complete transmission of current pdu */ - err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - if (err) - goto err_event_drop; - - if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { if (so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; - goto wait_free_buffer; + /* wait for complete transmission of current pdu */ + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; } /* PDU size > default => try max_pdu_size */ diff --git a/net/core/Makefile b/net/core/Makefile index 731db2eaa610..0cb734cbc24b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -40,3 +40,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o +obj-$(CONFIG_NET_TEST) += gso_test.o diff --git a/net/core/dev.c b/net/core/dev.c index 606a366cc209..97e7b9833db9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3292,15 +3292,19 @@ int skb_checksum_help(struct sk_buff *skb) offset = skb_checksum_start_offset(skb); ret = -EINVAL; - if (WARN_ON_ONCE(offset >= skb_headlen(skb))) { + if (unlikely(offset >= skb_headlen(skb))) { DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); + WARN_ONCE(true, "offset (%d) >= skb_headlen() (%u)\n", + offset, skb_headlen(skb)); goto out; } csum = skb_checksum(skb, offset, skb->len - offset, 0); offset += skb->csum_offset; - if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) { + if (unlikely(offset + sizeof(__sum16) > skb_headlen(skb))) { DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); + WARN_ONCE(true, "offset+2 (%zu) > skb_headlen() (%u)\n", + offset + sizeof(__sum16), skb_headlen(skb)); goto out; } ret = skb_ensure_writable(skb, offset + sizeof(__sum16)); @@ -3910,7 +3914,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, + enum skb_drop_reason *drop_reason) { int ret = TC_ACT_UNSPEC; #ifdef CONFIG_NET_CLS_ACT @@ -3922,12 +3927,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; + res.drop_reason = *drop_reason; mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: + *drop_reason = res.drop_reason; mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -3977,6 +3984,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev, bool *another) { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; int sch_ret; if (!entry) @@ -3994,7 +4002,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, if (sch_ret != TC_ACT_UNSPEC) goto ingress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); ingress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4011,7 +4019,7 @@ ingress_verdict: *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_RX_DROP; return NULL; /* used by tc_run */ @@ -4032,6 +4040,7 @@ static __always_inline struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; int sch_ret; if (!entry) @@ -4045,7 +4054,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (sch_ret != TC_ACT_UNSPEC) goto egress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); egress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4054,7 +4063,7 @@ egress_verdict: *ret = NET_XMIT_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_XMIT_DROP; return NULL; /* used by tc_run */ @@ -10497,7 +10506,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc( + struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10510,7 +10520,23 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device /* This READ_ONCE() pairs with the cmpxchg() above */ return READ_ONCE(dev->core_stats); } -EXPORT_SYMBOL(netdev_core_stats_alloc); + +noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + unsigned long __percpu *field; + + if (unlikely(!p)) { + p = netdev_core_stats_alloc(dev); + if (!p) + return; + } + + field = (__force unsigned long __percpu *)((__force void *)p + offset); + this_cpu_inc(*field); +} +EXPORT_SYMBOL_GPL(netdev_core_stats_inc); /** * dev_get_stats - get network device statistics diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..cc2e4babc85f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include <net/xdp.h> #include <net/mptcp.h> #include <net/netfilter/nf_conntrack_bpf.h> +#include <linux/un.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -5850,6 +5851,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5992,6 +5996,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6010,7 +6026,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) @@ -7858,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7876,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; @@ -8931,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size, if (off % size != 0) return false; - /* Disallow access to IPv6 fields from IPv4 contex and vise - * versa. + /* Disallow access to fields not belonging to the attach type's address + * family. */ switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): @@ -11752,6 +11779,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, return 0; } + +__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const u8 *sun_path, u32 sun_path__sz) +{ + struct sockaddr_un *un; + + if (sa_kern->sk->sk_family != AF_UNIX) + return -EINVAL; + + /* We do not allow changing the address to unnamed or larger than the + * maximum allowed address size for a unix sockaddr. + */ + if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX) + return -EINVAL; + + un = (struct sockaddr_un *)sa_kern->uaddr; + memcpy(un->sun_path, sun_path, sun_path__sz); + sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz; + + return 0; +} __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11776,6 +11824,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) +BTF_SET8_END(bpf_kfunc_check_set_sock_addr) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11786,6 +11838,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .set = &bpf_kfunc_check_set_xdp, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_addr, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11800,7 +11857,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); } late_initcall(bpf_kfunc_init); diff --git a/net/core/gso_test.c b/net/core/gso_test.c new file mode 100644 index 000000000000..ceb684be4cbf --- /dev/null +++ b/net/core/gso_test.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <kunit/test.h> +#include <linux/skbuff.h> + +static const char hdr[] = "abcdefgh"; +#define GSO_TEST_SIZE 1000 + +static void __init_skb(struct sk_buff *skb) +{ + skb_reset_mac_header(skb); + memcpy(skb_mac_header(skb), hdr, sizeof(hdr)); + + /* skb_segment expects skb->data at start of payload */ + skb_pull(skb, sizeof(hdr)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + /* proto is arbitrary, as long as not ETH_P_TEB or vlan */ + skb->protocol = htons(ETH_P_ATALK); + skb_shinfo(skb)->gso_size = GSO_TEST_SIZE; +} + +enum gso_test_nr { + GSO_TEST_LINEAR, + GSO_TEST_NO_GSO, + GSO_TEST_FRAGS, + GSO_TEST_FRAGS_PURE, + GSO_TEST_GSO_PARTIAL, + GSO_TEST_FRAG_LIST, + GSO_TEST_FRAG_LIST_PURE, + GSO_TEST_FRAG_LIST_NON_UNIFORM, + GSO_TEST_GSO_BY_FRAGS, +}; + +struct gso_test_case { + enum gso_test_nr id; + const char *name; + + /* input */ + unsigned int linear_len; + unsigned int nr_frags; + const unsigned int *frags; + unsigned int nr_frag_skbs; + const unsigned int *frag_skbs; + + /* output as expected */ + unsigned int nr_segs; + const unsigned int *segs; +}; + +static struct gso_test_case cases[] = { + { + .id = GSO_TEST_NO_GSO, + .name = "no_gso", + .linear_len = GSO_TEST_SIZE, + .nr_segs = 1, + .segs = (const unsigned int[]) { GSO_TEST_SIZE }, + }, + { + .id = GSO_TEST_LINEAR, + .name = "linear", + .linear_len = GSO_TEST_SIZE + GSO_TEST_SIZE + 1, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 }, + }, + { + .id = GSO_TEST_FRAGS, + .name = "frags", + .linear_len = GSO_TEST_SIZE, + .nr_frags = 2, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, 1 }, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 }, + }, + { + .id = GSO_TEST_FRAGS_PURE, + .name = "frags_pure", + .nr_frags = 3, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 }, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 }, + }, + { + .id = GSO_TEST_GSO_PARTIAL, + .name = "gso_partial", + .linear_len = GSO_TEST_SIZE, + .nr_frags = 2, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, 3 }, + .nr_segs = 2, + .segs = (const unsigned int[]) { 2 * GSO_TEST_SIZE, 3 }, + }, + { + /* commit 89319d3801d1: frag_list on mss boundaries */ + .id = GSO_TEST_FRAG_LIST, + .name = "frag_list", + .linear_len = GSO_TEST_SIZE, + .nr_frag_skbs = 2, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE }, + }, + { + .id = GSO_TEST_FRAG_LIST_PURE, + .name = "frag_list_pure", + .nr_frag_skbs = 2, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, + .nr_segs = 2, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, + }, + { + /* commit 43170c4e0ba7: GRO of frag_list trains */ + .id = GSO_TEST_FRAG_LIST_NON_UNIFORM, + .name = "frag_list_non_uniform", + .linear_len = GSO_TEST_SIZE, + .nr_frag_skbs = 4, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, 1, GSO_TEST_SIZE, 2 }, + .nr_segs = 4, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE, 3 }, + }, + { + /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and + * commit 90017accff61 ("sctp: Add GSO support") + * + * "there will be a cover skb with protocol headers and + * children ones containing the actual segments" + */ + .id = GSO_TEST_GSO_BY_FRAGS, + .name = "gso_by_frags", + .nr_frag_skbs = 4, + .frag_skbs = (const unsigned int[]) { 100, 200, 300, 400 }, + .nr_segs = 4, + .segs = (const unsigned int[]) { 100, 200, 300, 400 }, + }, +}; + +static void gso_test_case_to_desc(struct gso_test_case *t, char *desc) +{ + sprintf(desc, "%s", t->name); +} + +KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc); + +static void gso_test_func(struct kunit *test) +{ + const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct sk_buff *skb, *segs, *cur, *next, *last; + const struct gso_test_case *tcase; + netdev_features_t features; + struct page *page; + int i; + + tcase = test->param_value; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + skb = build_skb(page_address(page), sizeof(hdr) + tcase->linear_len + shinfo_size); + KUNIT_ASSERT_NOT_NULL(test, skb); + __skb_put(skb, sizeof(hdr) + tcase->linear_len); + + __init_skb(skb); + + if (tcase->nr_frags) { + unsigned int pg_off = 0; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + page_ref_add(page, tcase->nr_frags - 1); + + for (i = 0; i < tcase->nr_frags; i++) { + skb_fill_page_desc(skb, i, page, pg_off, tcase->frags[i]); + pg_off += tcase->frags[i]; + } + + KUNIT_ASSERT_LE(test, pg_off, PAGE_SIZE); + + skb->data_len = pg_off; + skb->len += skb->data_len; + skb->truesize += skb->data_len; + } + + if (tcase->frag_skbs) { + unsigned int total_size = 0, total_true_size = 0, alloc_size = 0; + struct sk_buff *frag_skb, *prev = NULL; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + page_ref_add(page, tcase->nr_frag_skbs - 1); + + for (i = 0; i < tcase->nr_frag_skbs; i++) { + unsigned int frag_size; + + frag_size = tcase->frag_skbs[i]; + frag_skb = build_skb(page_address(page) + alloc_size, + frag_size + shinfo_size); + KUNIT_ASSERT_NOT_NULL(test, frag_skb); + __skb_put(frag_skb, frag_size); + + if (prev) + prev->next = frag_skb; + else + skb_shinfo(skb)->frag_list = frag_skb; + prev = frag_skb; + + total_size += frag_size; + total_true_size += frag_skb->truesize; + alloc_size += frag_size + shinfo_size; + } + + KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE); + + skb->len += total_size; + skb->data_len += total_size; + skb->truesize += total_true_size; + + if (tcase->id == GSO_TEST_GSO_BY_FRAGS) + skb_shinfo(skb)->gso_size = GSO_BY_FRAGS; + } + + features = NETIF_F_SG | NETIF_F_HW_CSUM; + if (tcase->id == GSO_TEST_GSO_PARTIAL) + features |= NETIF_F_GSO_PARTIAL; + + /* TODO: this should also work with SG, + * rather than hit BUG_ON(i >= nfrags) + */ + if (tcase->id == GSO_TEST_FRAG_LIST_NON_UNIFORM) + features &= ~NETIF_F_SG; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) { + KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs)); + goto free_gso_skb; + } else if (!segs) { + KUNIT_FAIL(test, "no segments"); + goto free_gso_skb; + } + + last = segs->prev; + for (cur = segs, i = 0; cur; cur = next, i++) { + next = cur->next; + + KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]); + + /* segs have skb->data pointing to the mac header */ + KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data); + KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr)); + + /* header was copied to all segs */ + KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0); + + /* last seg can be found through segs->prev pointer */ + if (!next) + KUNIT_ASSERT_PTR_EQ(test, cur, last); + + consume_skb(cur); + } + + KUNIT_ASSERT_EQ(test, i, tcase->nr_segs); + +free_gso_skb: + consume_skb(skb); +} + +static struct kunit_case gso_test_cases[] = { + KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params), + {} +}; + +static struct kunit_suite gso_test_suite = { + .name = "net_core_gso", + .test_cases = gso_test_cases, +}; + +kunit_test_suite(gso_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for segmentation offload"); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index d6a70aeaa503..d22f0919821e 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -88,6 +88,12 @@ static void update_classid_task(struct task_struct *p, u32 classid) }; unsigned int fd = 0; + /* Only update the leader task, when many threads in this task, + * so it can avoid the useless traversal. + */ + if (p != p->group_leader) + return; + do { task_lock(p); fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 77cb75e63aca..8a9868ea5067 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -211,10 +211,6 @@ static int page_pool_init(struct page_pool *pool, */ } - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && - pool->p.flags & PP_FLAG_PAGE_FRAG) - return -EINVAL; - #ifdef CONFIG_PAGE_POOL_STATS pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) @@ -359,12 +355,20 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (dma_mapping_error(pool->p.dev, dma)) return false; - page_pool_set_dma_addr(page, dma); + if (page_pool_set_dma_addr(page, dma)) + goto unmap_failed; if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; + +unmap_failed: + WARN_ON_ONCE("unexpected DMA address, please report to netdev@"); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return false; } static void page_pool_set_pp_info(struct page_pool *pool, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7452a6d190c5..eef7f7788996 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4367,13 +4367,6 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { - [NDA_VLAN] = { .type = NLA_U16 }, - [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), - [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, - [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, -}; - static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4394,8 +4387,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } else { - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, - fdb_del_bulk_policy, extack); + /* For bulk delete, the drivers will parse the message with + * policy. + */ + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } if (err < 0) return err; @@ -4418,6 +4413,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); + if (err) + return err; } if (dev->type != ARPHRD_ETHER) { @@ -4425,10 +4424,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); - if (err) - return err; - err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -4442,8 +4437,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); } else { if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (err) @@ -4464,8 +4458,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* in case err was cleared by NTF_MASTER call */ err = -EOPNOTSUPP; if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (!err) { diff --git a/net/dccp/timer.c b/net/dccp/timer.c index b3255e87cc7e..a4cfb47b60e5 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, - icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, + icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. diff --git a/net/devlink/health.c b/net/devlink/health.c index 638cad8d5c65..51e6e81e31bb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -58,7 +58,6 @@ struct devlink_health_reporter { struct devlink *devlink; struct devlink_port *devlink_port; struct devlink_fmsg *dump_fmsg; - struct mutex dump_lock; /* lock parallel read/write from dump buffers */ u64 graceful_period; bool auto_recover; bool auto_dump; @@ -125,7 +124,6 @@ __devlink_health_reporter_create(struct devlink *devlink, reporter->graceful_period = graceful_period; reporter->auto_recover = !!ops->recover; reporter->auto_dump = !!ops->dump; - mutex_init(&reporter->dump_lock); return reporter; } @@ -226,7 +224,6 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create); static void devlink_health_reporter_free(struct devlink_health_reporter *reporter) { - mutex_destroy(&reporter->dump_lock); if (reporter->dump_fmsg) devlink_fmsg_free(reporter->dump_fmsg); kfree(reporter); @@ -625,10 +622,10 @@ int devlink_health_report(struct devlink_health_reporter *reporter, } if (reporter->auto_dump) { - mutex_lock(&reporter->dump_lock); + devl_lock(devlink); /* store current dump of current error, for later analysis */ devlink_health_do_dump(reporter, priv_ctx, NULL); - mutex_unlock(&reporter->dump_lock); + devl_unlock(devlink); } if (!reporter->auto_recover) @@ -1262,7 +1259,7 @@ out: } static struct devlink_health_reporter * -devlink_health_reporter_get_from_cb(struct netlink_callback *cb) +devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); struct devlink_health_reporter *reporter; @@ -1272,10 +1269,12 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) return NULL; - devl_unlock(devlink); reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); - devlink_put(devlink); + if (!reporter) { + devl_unlock(devlink); + devlink_put(devlink); + } return reporter; } @@ -1284,16 +1283,20 @@ int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_health_reporter *reporter; + struct devlink *devlink; int err; - reporter = devlink_health_reporter_get_from_cb(cb); + reporter = devlink_health_reporter_get_from_cb_lock(cb); if (!reporter) return -EINVAL; - if (!reporter->ops->dump) + devlink = reporter->devlink; + if (!reporter->ops->dump) { + devl_unlock(devlink); + devlink_put(devlink); return -EOPNOTSUPP; + } - mutex_lock(&reporter->dump_lock); if (!state->idx) { err = devlink_health_do_dump(reporter, NULL, cb->extack); if (err) @@ -1309,7 +1312,8 @@ int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb, DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET); unlock: - mutex_unlock(&reporter->dump_lock); + devl_unlock(devlink); + devlink_put(devlink); return err; } @@ -1326,9 +1330,7 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, if (!reporter->ops->dump) return -EOPNOTSUPP; - mutex_lock(&reporter->dump_lock); devlink_health_dump_clear(reporter); - mutex_unlock(&reporter->dump_lock); return 0; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 5f01bd4f9dec..6e0d000a97c4 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1554,20 +1554,6 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) return phydev; } -static void dsa_port_phylink_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state) -{ - /* Skip call for drivers which don't yet set mac_capabilities, - * since validating in that case would mean their PHY will advertise - * nothing. In turn, skipping validation makes them advertise - * everything that the PHY supports, so those drivers should be - * converted ASAP. - */ - if (config->mac_capabilities) - phylink_generic_validate(config, supported, state); -} - static struct phylink_pcs * dsa_port_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) @@ -1666,7 +1652,6 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, } static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { - .validate = dsa_port_phylink_validate, .mac_select_pcs = dsa_port_phylink_mac_select_pcs, .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 0515d6604b3b..883ed9be81f9 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -431,8 +431,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { + u32 *orig_bitmap, *saved_bitmap = NULL; struct nlattr *bit_attr; bool no_mask; + bool dummy; int rem; int ret; @@ -448,8 +450,22 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, } no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; - if (no_mask) - ethnl_bitmap32_clear(bitmap, 0, nbits, mod); + if (no_mask) { + unsigned int nwords = DIV_ROUND_UP(nbits, 32); + unsigned int nbytes = nwords * sizeof(u32); + + /* The bitmap size is only the size of the map part without + * its mask part. + */ + saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL); + if (!saved_bitmap) + return -ENOMEM; + memcpy(saved_bitmap, bitmap, nbytes); + ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy); + orig_bitmap = saved_bitmap; + } else { + orig_bitmap = bitmap; + } nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; @@ -458,13 +474,14 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS"); - return -EINVAL; + ret = -EINVAL; + goto out; } ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask, names, extack); if (ret < 0) - return ret; - old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32)); + goto out; + old_val = orig_bitmap[idx / 32] & ((u32)1 << (idx % 32)); if (new_val != old_val) { if (new_val) bitmap[idx / 32] |= ((u32)1 << (idx % 32)); @@ -474,7 +491,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, } } - return 0; + ret = 0; +out: + kfree(saved_bitmap); + return ret; } static int ethnl_compact_sanity_checks(unsigned int nbits, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..5ce275b2d7ef 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; @@ -788,6 +788,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); + int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); @@ -800,7 +801,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -808,12 +809,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet_getname); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 66fac1216d46..8b65f12583eb 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -66,8 +66,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s { struct ip_options *opt = &(IPCB(skb)->opt); - __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -130,6 +128,8 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; + __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2c61f444e1c7..823306487a82 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e2bf4602b559..3290a4442b4a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2885,54 +2885,6 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache) -{ -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct rtable *rt = NULL; - struct flowi4 fl4; - __u8 tos; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - rt = dst_cache_get_ip4(dst_cache, saddr); - if (rt) - return rt; - } -#endif - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = protocol; - fl4.daddr = info->key.u.ipv4.dst; - fl4.saddr = info->key.u.ipv4.src; - tos = info->key.tos; - fl4.flowi4_tos = RT_TOS(tos); - - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (rt->dst.dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); - ip_rt_put(rt); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); -#endif - *saddr = fl4.saddr; - return rt; -} -EXPORT_SYMBOL_GPL(ip_route_output_tunnel); - /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e7f024d93572..f63a545a7374 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1498,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "tcp_pingpong_thresh", + .data = &init_net.ipv4.sysctl_tcp_pingpong_thresh, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ONE, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9a8b134d8ada..faabb5a4a378 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3756,8 +3756,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_SYN_DATA; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); - info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, - tcp_delack_max(sk))); + info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, + tcp_delack_max(sk))); info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4b8f2e74d71d..ab87f0285b72 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -778,6 +778,16 @@ new_measure: tp->rcvq_space.time = tp->tcp_mstamp; } +static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet_connection_sock *icsk = inet_csk(sk); + + if (skb->protocol == htons(ETH_P_IPV6)) + icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb))); +#endif +} + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The @@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } } icsk->icsk_ack.lrcvtime = now; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); @@ -4513,12 +4524,23 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) { /* When the ACK path fails or drops most ACKs, the sender would * timeout and spuriously retransmit the same segment repeatedly. - * The receiver remembers and reflects via DSACKs. Leverage the - * DSACK state and change the txhash to re-route speculatively. + * If it seems our ACKs are not reaching the other side, + * based on receiving a duplicate data segment with new flowlabel + * (suggesting the sender suffered an RTO), and we are not already + * repathing due to our own RTO, then rehash the socket to repath our + * packets. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && +#if IS_ENABLED(CONFIG_IPV6) + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss && + skb->protocol == htons(ETH_P_IPV6) && + (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel != + ntohl(ip6_flowlabel(ipv6_hdr(skb)))) && sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + + /* Save last flowlabel after a spurious retrans. */ + tcp_save_lrcv_flowlabel(sk, skb); +#endif } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) @@ -4835,6 +4857,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) u32 seq, end_seq; bool fragstolen; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a441740616d7..2519f530b114 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); } /* This will initiate an outgoing connection. */ @@ -3288,6 +3288,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; net->ipv4.sysctl_tcp_shrink_window = 0; + net->ipv4.sysctl_tcp_pingpong_thresh = 1; + return 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f207712eece1..8e6ebf35ed58 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp, tp->lsndtime = now; /* If it is a reply for ato after last received - * packet, enter pingpong mode. + * packet, increase pingpong count. */ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_enter_pingpong_mode(sk); + inet_csk_inc_pingpong_cnt(sk); } /* Account for an ACK we sent. */ @@ -2449,6 +2449,7 @@ static int tcp_mtu_probe(struct sock *sk) /* build the payload, and be prepared to abort if this fails. */ if (tcp_clone_payload(sk, nskb, probe_size)) { + tcp_skb_tsorted_anchor_cleanup(nskb); consume_skb(nskb); return -1; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3f61c6a70a1f..0862b73dd3b5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7f7724beca33..1734fd6a1ce0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1144,7 +1144,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, - (struct sockaddr *)usin, &ipc.addr); + (struct sockaddr *)usin, + &msg->msg_namelen, + &ipc.addr); if (err) goto out_free; if (usin) { @@ -1867,7 +1869,8 @@ try_again: *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); + (struct sockaddr *)sin, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1906,7 +1909,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 1e7e4aecdc48..a87defb2b167 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -204,4 +204,53 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, } EXPORT_SYMBOL_GPL(udp_tun_rx_dst); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache) +{ + struct rtable *rt = NULL; + struct flowi4 fl4; + +#ifdef CONFIG_DST_CACHE + if (dst_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } +#endif + + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; + fl4.flowi4_oif = oif; + fl4.daddr = key->u.ipv4.dst; + fl4.saddr = key->u.ipv4.src; + fl4.fl4_dport = dport; + fl4.fl4_sport = sport; + fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_flags = key->flow_flags; + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (dst_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); +#endif + *saddr = fl4.saddr; + return rt; +} +EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup); + MODULE_LICENSE("GPL"); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c6ad0d6e99b5..c35d302a3da9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -454,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; @@ -520,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; + int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -539,7 +540,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -547,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); @@ -1061,6 +1062,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cdaa9275e990..a471c7e91761 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -446,10 +446,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - - __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -617,6 +613,8 @@ int ip6_forward(struct sk_buff *skb) } } + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index e8fb0d275cc2..d2098dd4ceae 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bfe7d19ff4fd..d410703bb5a1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5e9312eefed0..622b10a549f7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -410,7 +410,8 @@ try_again: *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + (struct sockaddr *)sin6, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1157,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** @@ -1510,6 +1511,7 @@ do_udp_sendmsg: if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, + &addr_len, &fl6->saddr); if (err) goto out_no_dst; diff --git a/net/mctp/route.c b/net/mctp/route.c index ab62fe447038..7a47a58aa54b 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -737,6 +737,8 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, { struct mctp_route *tmp, *rt = NULL; + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { /* TODO: add metrics */ if (mctp_rt_match_eid(tmp, dnet, daddr)) { @@ -747,21 +749,29 @@ struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, } } + rcu_read_unlock(); + return rt; } static struct mctp_route *mctp_route_lookup_null(struct net *net, struct net_device *dev) { - struct mctp_route *rt; + struct mctp_route *tmp, *rt = NULL; - list_for_each_entry_rcu(rt, &net->mctp.routes, list) { - if (rt->dev->dev == dev && rt->type == RTN_LOCAL && - refcount_inc_not_zero(&rt->refs)) - return rt; + rcu_read_lock(); + + list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { + if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL && + refcount_inc_not_zero(&tmp->refs)) { + rt = tmp; + break; + } } - return NULL; + rcu_read_unlock(); + + return rt; } static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9f6f2e643575..124136b5a79a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2042,24 +2042,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_in); -/* Alter reply tuple (maybe alter helper). This is for NAT, and is - implicitly racy: see __nf_conntrack_confirm */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply) -{ - struct nf_conn_help *help = nfct_help(ct); - - /* Should be unconfirmed, so not in hash table yet */ - WARN_ON(nf_ct_is_confirmed(ct)); - - nf_ct_dump_tuple(newreply); - - ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (ct->master || (help && !hlist_empty(&help->expectations))) - return; -} -EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); - /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index f22691f83853..4ed5878cb25b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -194,12 +194,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; - /* We already got a helper explicitly attached. The function - * nf_conntrack_alter_reply - in case NAT is in use - asks for looking - * the helper up again. Since now the user is in full control of - * making consistent helper configurations, skip this automatic - * re-lookup, otherwise we'll lose the helper. - */ + /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4018acb1d674..e573be5afde7 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -835,7 +835,8 @@ static bool tcp_error(const struct tcphdr *th, static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *th) + const struct tcphdr *th, + const struct nf_hook_state *state) { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); @@ -846,7 +847,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { - pr_debug("nf_ct_tcp: invalid new deleting.\n"); + tcp_error_log(skb, state, "invalid new"); return false; } @@ -980,7 +981,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (tcp_error(th, skb, dataoff, state)) return -NF_ACCEPT; - if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th)) + if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th, state)) return -NF_ACCEPT; spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b4405db710b0..68321345bb6d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3441,20 +3441,21 @@ static void audit_log_rule_reset(const struct nft_table *table, } struct nft_rule_dump_ctx { + unsigned int s_idx; char *table; char *chain; + bool reset; }; static int __nf_tables_dump_rules(struct sk_buff *skb, unsigned int *idx, struct netlink_callback *cb, const struct nft_table *table, - const struct nft_chain *chain, - bool reset) + const struct nft_chain *chain) { + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; - unsigned int s_idx = cb->args[0]; unsigned int entries = 0; int ret = 0; u64 handle; @@ -3463,12 +3464,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) goto cont_skip; - if (*idx < s_idx) + if (*idx < ctx->s_idx) goto cont; - if (*idx > s_idx) { - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - } if (prule) handle = prule->handle; else @@ -3479,7 +3476,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, handle, reset) < 0) { + table, chain, rule, handle, ctx->reset) < 0) { ret = 1; break; } @@ -3491,7 +3488,7 @@ cont_skip: (*idx)++; } - if (reset && entries) + if (ctx->reset && entries) audit_log_rule_reset(table, cb->seq, entries); return ret; @@ -3501,17 +3498,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; - bool reset = false; - - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - reset = true; rcu_read_lock(); nft_net = nft_pernet(net); @@ -3521,10 +3514,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (family != NFPROTO_UNSPEC && family != table->family) continue; - if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) + if (ctx->table && strcmp(ctx->table, table->name) != 0) continue; - if (ctx && ctx->table && ctx->chain) { + if (ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, @@ -3536,7 +3529,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (!nft_is_active(net, chain)) continue; __nf_tables_dump_rules(skb, &idx, - cb, table, chain, reset); + cb, table, chain); break; } goto done; @@ -3544,62 +3537,51 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(chain, &table->chains, list) { if (__nf_tables_dump_rules(skb, &idx, - cb, table, chain, reset)) + cb, table, chain)) goto done; } - if (ctx && ctx->table) + if (ctx->table) break; } done: rcu_read_unlock(); - cb->args[0] = idx; + ctx->s_idx = idx; return skb->len; } static int nf_tables_dump_rules_start(struct netlink_callback *cb) { + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; - struct nft_rule_dump_ctx *ctx = NULL; - if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) - return -ENOMEM; + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); - if (nla[NFTA_RULE_TABLE]) { - ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], - GFP_ATOMIC); - if (!ctx->table) { - kfree(ctx); - return -ENOMEM; - } - } - if (nla[NFTA_RULE_CHAIN]) { - ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], - GFP_ATOMIC); - if (!ctx->chain) { - kfree(ctx->table); - kfree(ctx); - return -ENOMEM; - } + if (nla[NFTA_RULE_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC); + if (!ctx->table) + return -ENOMEM; + } + if (nla[NFTA_RULE_CHAIN]) { + ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC); + if (!ctx->chain) { + kfree(ctx->table); + return -ENOMEM; } } + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) + ctx->reset = true; - cb->data = ctx; return 0; } static int nf_tables_dump_rules_done(struct netlink_callback *cb) { - struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; - if (ctx) { - kfree(ctx->table); - kfree(ctx->chain); - kfree(ctx); - } + kfree(ctx->table); + kfree(ctx->chain); return 0; } diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 6705bb895e23..1dac28136e6a 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -203,17 +203,13 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) { llcp_sock = tmp_sock; + sock_hold(&llcp_sock->sk); break; } } read_unlock(&local->sockets.lock); - if (llcp_sock == NULL) - return NULL; - - sock_hold(&llcp_sock->sk); - return llcp_sock; } @@ -346,7 +342,8 @@ static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len) static struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, - const u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len, + bool needref) { struct sock *sk; struct nfc_llcp_sock *llcp_sock, *tmp_sock; @@ -382,6 +379,8 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, if (memcmp(sn, tmp_sock->service_name, sn_len) == 0) { llcp_sock = tmp_sock; + if (needref) + sock_hold(&llcp_sock->sk); break; } } @@ -423,7 +422,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, * to this service name. */ if (nfc_llcp_sock_from_sn(local, sock->service_name, - sock->service_name_len) != NULL) { + sock->service_name_len, + false) != NULL) { mutex_unlock(&local->sdp_lock); return LLCP_SAP_MAX; @@ -824,16 +824,7 @@ out: static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, const u8 *sn, size_t sn_len) { - struct nfc_llcp_sock *llcp_sock; - - llcp_sock = nfc_llcp_sock_from_sn(local, sn, sn_len); - - if (llcp_sock == NULL) - return NULL; - - sock_hold(&llcp_sock->sk); - - return llcp_sock; + return nfc_llcp_sock_from_sn(local, sn, sn_len, true); } static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len) @@ -1298,7 +1289,8 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, } llcp_sock = nfc_llcp_sock_from_sn(local, service_name, - service_name_len); + service_name_len, + true); if (!llcp_sock) { sap = 0; goto add_snl; @@ -1318,6 +1310,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, if (sap == LLCP_SAP_MAX) { sap = 0; + nfc_llcp_sock_put(llcp_sock); goto add_snl; } @@ -1335,6 +1328,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, pr_debug("%p %d\n", llcp_sock, sap); + nfc_llcp_sock_put(llcp_sock); add_snl: sdp = nfc_llcp_build_sdres_tlv(tid, sap); if (sdp == NULL) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index fff755dde30d..6c9592d05120 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -909,6 +909,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, return -EINVAL; } + if (protocol >= NFC_PROTO_MAX) { + pr_err("the requested nfc protocol is invalid\n"); + return -EINVAL; + } + if (!(nci_target->supported_protocols & (1 << protocol))) { pr_err("target does not support the requested protocol 0x%x\n", protocol); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4f3b1798e0b2..d108ae0bd0ee 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -220,16 +220,13 @@ static struct mask_array *tbl_mask_array_alloc(int size) struct mask_array *new; size = max(MASK_ARRAY_SIZE_MIN, size); - new = kzalloc(sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * size + + new = kzalloc(struct_size(new, masks, size) + sizeof(u64) * size, GFP_KERNEL); if (!new) return NULL; new->masks_usage_zero_cntr = (u64 *)((u8 *)new + - sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * - size); + struct_size(new, masks, size)); new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + sizeof(u64) * size, diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 9e659db78c05..f524dc3e4862 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -48,7 +48,7 @@ struct mask_array { int count, max; struct mask_array_stats __percpu *masks_usage_stats; u64 *masks_usage_zero_cntr; - struct sw_flow_mask __rcu *masks[]; + struct sw_flow_mask __rcu *masks[] __counted_by(max); }; struct table_instance { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8f97648d652f..a84e00b5904b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3607,7 +3607,12 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; - memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len); + + /* Let __fortify_memcpy_chk() know the actual buffer size. */ + memcpy(((struct sockaddr_storage *)sll)->__data + + offsetof(struct sockaddr_ll, sll_addr) - + offsetofend(struct sockaddr_ll, sll_family), + dev->dev_addr, dev->addr_len); } else { sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a193cc7b3241..1daeb2182b70 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1681,12 +1681,16 @@ reclassify: * time we got here with a cookie from hardware. */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || - !tp->ops->get_exts)) + !tp->ops->get_exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } exts = tp->ops->get_exts(tp, n->handle); - if (unlikely(!exts || n->exts != exts)) + if (unlikely(!exts || n->exts != exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } n = NULL; err = tcf_exts_exec_ex(skb, exts, act_index, res); @@ -1712,8 +1716,10 @@ reclassify: return err; } - if (unlikely(n)) + if (unlikely(n)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT @@ -1723,6 +1729,7 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } @@ -1759,8 +1766,10 @@ int tcf_classify(struct sk_buff *skb, if (ext->act_miss) { n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); - if (!n) + if (!n) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } chain = n->chain_index; } else { @@ -1768,8 +1777,10 @@ int tcf_classify(struct sk_buff *skb, } fchain = tcf_chain_lookup_rcu(block, chain); - if (!fchain) + if (!fchain) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } /* Consume, so cloned/redirect skbs won't inherit ext */ skb_ext_del(skb, TC_SKB_EXT); @@ -1788,8 +1799,11 @@ int tcf_classify(struct sk_buff *skb, struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) + if (WARN_ON_ONCE(!ext)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } + ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index da4c179a4d41..6663e971a13e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -366,7 +366,7 @@ static int u32_init(struct tcf_proto *tp) idr_init(&root_ht->handle_idr); if (tp_c == NULL) { - tp_c = kzalloc(struct_size(tp_c, hlist->ht, 1), GFP_KERNEL); + tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); if (tp_c == NULL) { kfree(root_ht); return -ENOBUFS; diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 1ab3c5a2c5ad..746be3996768 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -2,6 +2,7 @@ config SMC tristate "SMC socket protocol family" depends on INET && INFINIBAND + depends on m || ISM != m help SMC-R provides a "sockets over RDMA" solution making use of RDMA over Converged Ethernet (RoCE) technology to upgrade diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index aa8928975cc6..9d32058db2b5 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -92,13 +92,14 @@ do { \ typeof(_smc_stats) stats = (_smc_stats); \ typeof(_tech) t = (_tech); \ typeof(_len) l = (_len); \ - int _pos = fls64((l) >> 13); \ + int _pos; \ typeof(_rc) r = (_rc); \ int m = SMC_BUF_MAX - 1; \ this_cpu_inc((*stats).smc[t].key ## _cnt); \ - if (r <= 0) \ + if (r <= 0 || l <= 0) \ break; \ - _pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \ + _pos = fls64((l - 1) >> 13); \ + _pos = (_pos <= m) ? _pos : m; \ this_cpu_inc((*stats).smc[t].key ## _pd.buf[_pos]); \ this_cpu_add((*stats).smc[t].key ## _bytes, r); \ } \ @@ -138,9 +139,12 @@ while (0) do { \ typeof(_len) _l = (_len); \ typeof(_tech) t = (_tech); \ - int _pos = fls((_l) >> 13); \ + int _pos; \ int m = SMC_BUF_MAX - 1; \ - _pos = (_pos < m) ? ((_l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \ + if (_l <= 0) \ + break; \ + _pos = fls((_l - 1) >> 13); \ + _pos = (_pos <= m) ? _pos : m; \ this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \ } \ while (0) diff --git a/net/tls/tls.h b/net/tls/tls.h index 28a8c0e80e3c..478b2c0060aa 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -127,7 +127,7 @@ struct tls_rec { struct sock *sk; char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[MAX_IV_SIZE]; + u8 iv_data[TLS_MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; @@ -142,7 +142,11 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); void tls_err_abort(struct sock *sk, int err); -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc, + int mode); +int tls_set_sw_offload(struct sock *sk, int tx); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); @@ -223,7 +227,7 @@ static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) #ifdef CONFIG_TLS_DEVICE int tls_device_init(void); void tls_device_cleanup(void); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_set_device_offload(struct sock *sk); void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); @@ -234,7 +238,7 @@ static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int -tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +tls_set_device_offload(struct sock *sk) { return -EOPNOTSUPP; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 8c94c926606a..f01543557a60 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -56,11 +56,8 @@ static struct page *dummy_page; static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) { + if (ctx->tx_conf == TLS_HW) kfree(tls_offload_ctx_tx(ctx)); - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); - } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); @@ -891,14 +888,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) struct strp_msg *rxm; char *orig_buf, *buf; - switch (tls_ctx->crypto_recv.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); rxm = strp_msg(tls_strp_msg(sw_ctx)); orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv, @@ -1042,22 +1033,45 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, } } -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *ctx) +{ + struct tls_offload_context_tx *offload_ctx; + __be64 rcd_sn; + + offload_ctx = kzalloc(sizeof(*offload_ctx), GFP_KERNEL); + if (!offload_ctx) + return NULL; + + INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); + INIT_LIST_HEAD(&offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + sg_init_table(offload_ctx->sg_tx_data, + ARRAY_SIZE(offload_ctx->sg_tx_data)); + + /* start at rec_seq - 1 to account for the start marker record */ + memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); + offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; + + offload_ctx->ctx = ctx; + + return offload_ctx; +} + +int tls_set_device_offload(struct sock *sk) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - const struct tls_cipher_desc *cipher_desc; struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; + const struct tls_cipher_desc *cipher_desc; struct tls_crypto_info *crypto_info; + struct tls_prot_info *prot; struct net_device *netdev; - char *iv, *rec_seq; + struct tls_context *ctx; struct sk_buff *skb; - __be64 rcd_sn; + char *iv, *rec_seq; int rc; - if (!ctx) - return -EINVAL; + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (ctx->priv_ctx_tx) return -EEXIST; @@ -1085,38 +1099,23 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) goto release_netdev; } + rc = init_prot_info(prot, crypto_info, cipher_desc, TLS_HW); + if (rc) + goto release_netdev; + iv = crypto_info_iv(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv; - prot->tag_size = cipher_desc->tag; - prot->overhead_size = prot->prepend_size + prot->tag_size; - prot->iv_size = cipher_desc->iv; - prot->salt_size = cipher_desc->salt; - ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL); - if (!ctx->tx.iv) { - rc = -ENOMEM; - goto release_netdev; - } - memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); - - prot->rec_seq_size = cipher_desc->rec_seq; - ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL); - if (!ctx->tx.rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq); start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); if (!start_marker_record) { rc = -ENOMEM; - goto free_rec_seq; + goto release_netdev; } - offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + offload_ctx = alloc_offload_ctx_tx(ctx); if (!offload_ctx) { rc = -ENOMEM; goto free_marker_record; @@ -1126,22 +1125,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (rc) goto free_offload_ctx; - /* start at rec_seq - 1 to account for the start marker record */ - memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); - offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; - start_marker_record->end_seq = tcp_sk(sk)->write_seq; start_marker_record->len = 0; start_marker_record->num_frags = 0; - - INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); - offload_ctx->ctx = ctx; - - INIT_LIST_HEAD(&offload_ctx->records_list); list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - spin_lock_init(&offload_ctx->lock); - sg_init_table(offload_ctx->sg_tx_data, - ARRAY_SIZE(offload_ctx->sg_tx_data)); clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; @@ -1198,10 +1185,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -free_rec_seq: - kfree(ctx->tx.rec_seq); -free_iv: - kfree(ctx->tx.iv); release_netdev: dev_put(netdev); return rc; @@ -1242,7 +1225,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_lock; } - context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) { rc = -ENOMEM; goto release_lock; @@ -1250,7 +1233,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0); if (rc) goto release_ctx; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 1d743f310f4f..4e7228f275fa 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -54,7 +54,7 @@ static int tls_enc_record(struct aead_request *aead_req, struct scatter_walk *out, int *in_len, struct tls_prot_info *prot) { - unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE]; + unsigned char buf[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; const struct tls_cipher_desc *cipher_desc; struct scatterlist sg_in[3]; struct scatterlist sg_out[3]; @@ -62,14 +62,8 @@ static int tls_enc_record(struct aead_request *aead_req, u16 len; int rc; - switch (prot->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } cipher_desc = get_cipher_desc(prot->cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); buf_size = TLS_HEADER_SIZE + cipher_desc->iv; len = min_t(int, *in_len, buf_size); @@ -338,17 +332,9 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, if (!aead_req) return NULL; - switch (tls_ctx->crypto_send.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - salt = tls_ctx->crypto_send.aes_gcm_128.salt; - break; - case TLS_CIPHER_AES_GCM_256: - salt = tls_ctx->crypto_send.aes_gcm_256.salt; - break; - default: - goto free_req; - } cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); + buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE + sync_size + cipher_desc->tag; buf = kmalloc(buf_len, GFP_ATOMIC); @@ -356,6 +342,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, goto free_req; iv = buf; + salt = crypto_info_salt(&tls_ctx->crypto_send.info, cipher_desc); memcpy(iv, salt, cipher_desc->salt); aad = buf + cipher_desc->salt + cipher_desc->iv; dummy_buf = aad + TLS_AAD_SPACE_SIZE; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 02f583ff9239..b125a08a618a 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -59,7 +59,8 @@ enum { }; #define CHECK_CIPHER_DESC(cipher,ci) \ - static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE); \ + static_assert(cipher ## _IV_SIZE <= TLS_MAX_IV_SIZE); \ + static_assert(cipher ## _SALT_SIZE <= TLS_MAX_SALT_SIZE); \ static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \ static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \ static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \ @@ -344,8 +345,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, /* We need these for tls_sw_fallback handling of other packets */ if (ctx->tx_conf == TLS_SW) { - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); } else if (ctx->tx_conf == TLS_HW) { @@ -581,6 +580,31 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, return do_tls_getsockopt(sk, optname, optval, optlen); } +static int validate_crypto_info(const struct tls_crypto_info *crypto_info, + const struct tls_crypto_info *alt_crypto_info) +{ + if (crypto_info->version != TLS_1_2_VERSION && + crypto_info->version != TLS_1_3_VERSION) + return -EINVAL; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_ARIA_GCM_128: + case TLS_CIPHER_ARIA_GCM_256: + if (crypto_info->version != TLS_1_2_VERSION) + return -EINVAL; + break; + } + + /* Ensure that TLS version and ciphers are same in both directions */ + if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { + if (alt_crypto_info->version != crypto_info->version || + alt_crypto_info->cipher_type != crypto_info->cipher_type) + return -EINVAL; + } + + return 0; +} + static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { @@ -612,21 +636,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - /* check version */ - if (crypto_info->version != TLS_1_2_VERSION && - crypto_info->version != TLS_1_3_VERSION) { - rc = -EINVAL; + rc = validate_crypto_info(crypto_info, alt_crypto_info); + if (rc) goto err_crypto_info; - } - - /* Ensure that TLS version and ciphers are same in both directions */ - if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { - if (alt_crypto_info->version != crypto_info->version || - alt_crypto_info->cipher_type != crypto_info->cipher_type) { - rc = -EINVAL; - goto err_crypto_info; - } - } cipher_desc = get_cipher_desc(crypto_info->cipher_type); if (!cipher_desc) { @@ -634,16 +646,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_ARIA_GCM_128: - case TLS_CIPHER_ARIA_GCM_256: - if (crypto_info->version != TLS_1_2_VERSION) { - rc = -EINVAL; - goto err_crypto_info; - } - break; - } - if (optlen != cipher_desc->crypto_info) { rc = -EINVAL; goto err_crypto_info; @@ -658,13 +660,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, } if (tx) { - rc = tls_set_device_offload(sk, ctx); + rc = tls_set_device_offload(sk); conf = TLS_HW; if (!rc) { TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 1); + rc = tls_set_sw_offload(sk, 1); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); @@ -678,7 +680,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 270712b8d391..0f6da4ce3ed7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -60,7 +60,7 @@ struct tls_decrypt_arg { struct tls_decrypt_ctx { struct sock *sk; - u8 iv[MAX_IV_SIZE]; + u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; struct scatterlist sg[]; @@ -2319,7 +2319,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; - char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; + char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; size_t cipher_overhead; size_t data_len = 0; int ret; @@ -2467,9 +2467,6 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); - if (ctx->aead_recv) { __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); @@ -2581,69 +2578,119 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc, + int mode) +{ + u16 nonce_size = cipher_desc->nonce; + + if (crypto_info->version == TLS_1_3_VERSION) { + nonce_size = 0; + prot->aad_size = TLS_HEADER_SIZE; + prot->tail_size = 1; + } else { + prot->aad_size = TLS_AAD_SPACE_SIZE; + prot->tail_size = 0; + } + + if (mode == TLS_HW) { + prot->aad_size = 0; + prot->tail_size = 0; + } + + /* Sanity-check the sizes for stack allocations. */ + if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) + return -EINVAL; + + prot->version = crypto_info->version; + prot->cipher_type = crypto_info->cipher_type; + prot->prepend_size = TLS_HEADER_SIZE + nonce_size; + prot->tag_size = cipher_desc->tag; + prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; + prot->iv_size = cipher_desc->iv; + prot->salt_size = cipher_desc->salt; + prot->rec_seq_size = cipher_desc->rec_seq; + + return 0; +} + +int tls_set_sw_offload(struct sock *sk, int tx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - struct tls_crypto_info *crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; + const struct tls_cipher_desc *cipher_desc; + struct tls_crypto_info *crypto_info; + char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; + struct tls_prot_info *prot; struct crypto_aead **aead; + struct tls_context *ctx; struct crypto_tfm *tfm; - char *iv, *rec_seq, *key, *salt; - const struct tls_cipher_desc *cipher_desc; - u16 nonce_size; int rc = 0; - if (!ctx) { - rc = -EINVAL; - goto out; - } + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } @@ -2653,58 +2700,25 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_priv; } - nonce_size = cipher_desc->nonce; + rc = init_prot_info(prot, crypto_info, cipher_desc, TLS_SW); + if (rc) + goto free_priv; iv = crypto_info_iv(crypto_info, cipher_desc); key = crypto_info_key(crypto_info, cipher_desc); salt = crypto_info_salt(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - if (crypto_info->version == TLS_1_3_VERSION) { - nonce_size = 0; - prot->aad_size = TLS_HEADER_SIZE; - prot->tail_size = 1; - } else { - prot->aad_size = TLS_AAD_SPACE_SIZE; - prot->tail_size = 0; - } - - /* Sanity-check the sizes for stack allocations. */ - if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) { - rc = -EINVAL; - goto free_priv; - } - - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + nonce_size; - prot->tag_size = cipher_desc->tag; - prot->overhead_size = prot->prepend_size + - prot->tag_size + prot->tail_size; - prot->iv_size = cipher_desc->iv; - prot->salt_size = cipher_desc->salt; - cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL); - if (!cctx->iv) { - rc = -ENOMEM; - goto free_priv; - } - /* Note: 128 & 256 bit salt are the same size */ - prot->rec_seq_size = cipher_desc->rec_seq; memcpy(cctx->iv, salt, cipher_desc->salt); memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); - - cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL); - if (!cctx->rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; - goto free_rec_seq; + goto free_priv; } } @@ -2736,12 +2750,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) free_aead: crypto_free_aead(*aead); *aead = NULL; -free_rec_seq: - kfree(cctx->rec_seq); - cctx->rec_seq = NULL; -free_iv: - kfree(cctx->iv); - cctx->iv = NULL; free_priv: if (tx) { kfree(ctx->priv_ctx_tx); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e8a04a13668..e10d07c76044 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include <linux/freezer.h> #include <linux/file.h> #include <linux/btf_ids.h> +#include <linux/bpf-cgroup.h> #include "scm.h" @@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { @@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); @@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); + + if (peer) + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETPEERNAME); + else + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: @@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; + + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen, + NULL); + if (err) + goto out; } else { sunaddr = NULL; err = -ENOTCONN; @@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - if (msg->msg_name) + if (msg->msg_name) { unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen); + } + if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) @@ -2744,6 +2772,11 @@ unlock: DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); + + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + state->msg->msg_name, + &state->msg->msg_namelen); + sunaddr = NULL; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 013b65241b65..816725af281f 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -89,6 +89,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/cred.h> +#include <linux/errqueue.h> #include <linux/init.h> #include <linux/io.h> #include <linux/kernel.h> @@ -110,6 +111,7 @@ #include <linux/workqueue.h> #include <net/sock.h> #include <net/af_vsock.h> +#include <uapi/linux/vm_sockets.h> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); @@ -1030,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); mask = 0; - if (sk->sk_err) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; @@ -1404,6 +1406,17 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, goto out; } + if (vsock_msgzerocopy_allow(transport)) { + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + } else if (sock_flag(sk, SOCK_ZEROCOPY)) { + /* If this option was set before 'connect()', + * when transport was unknown, check that this + * feature is supported here. + */ + err = -EOPNOTSUPP; + goto out; + } + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1558,6 +1571,9 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, } else { newsock->state = SS_CONNECTED; sock_graft(connected, newsock); + if (vsock_msgzerocopy_allow(vconnected->transport)) + set_bit(SOCK_SUPPORT_ZC, + &connected->sk_socket->flags); } release_sock(connected); @@ -1635,7 +1651,7 @@ static int vsock_connectible_setsockopt(struct socket *sock, const struct vsock_transport *transport; u64 val; - if (level != AF_VSOCK) + if (level != AF_VSOCK && level != SOL_SOCKET) return -ENOPROTOOPT; #define COPY_IN(_v) \ @@ -1658,6 +1674,33 @@ static int vsock_connectible_setsockopt(struct socket *sock, transport = vsk->transport; + if (level == SOL_SOCKET) { + int zerocopy; + + if (optname != SO_ZEROCOPY) { + release_sock(sk); + return sock_setsockopt(sock, level, optname, optval, optlen); + } + + /* Use 'int' type here, because variable to + * set this option usually has this type. + */ + COPY_IN(zerocopy); + + if (zerocopy < 0 || zerocopy > 1) { + err = -EINVAL; + goto exit; + } + + if (transport && !vsock_msgzerocopy_allow(transport)) { + err = -EOPNOTSUPP; + goto exit; + } + + sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy); + goto exit; + } + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -1822,6 +1865,12 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } + if (msg->msg_flags & MSG_ZEROCOPY && + !vsock_msgzerocopy_allow(transport)) { + err = -EOPNOTSUPP; + goto out; + } + /* Wait for room in the produce queue to enqueue our user's data. */ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -2137,6 +2186,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int err; sk = sock->sk; + + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR); + vsk = vsock_sk(sk); err = 0; @@ -2304,6 +2357,12 @@ static int vsock_create(struct net *net, struct socket *sock, } } + /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its + * proto_ops, so there is no handler for custom logic. + */ + if (sock_type_connectible(sock->type)) + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); + vsock_insert_unbound(vsk); return 0; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 09ba3128e759..d324ae13e2f5 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -486,6 +486,11 @@ static bool virtio_transport_can_msgzerocopy(int bufs_num) return res; } +static bool virtio_transport_msgzerocopy_allow(void) +{ + return true; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -519,6 +524,8 @@ static struct virtio_transport virtio_transport = { .seqpacket_allow = virtio_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = virtio_transport_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 5c6360df1f31..048640167411 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -47,6 +47,10 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk) } static bool vsock_loopback_seqpacket_allow(u32 remote_cid); +static bool vsock_loopback_msgzerocopy_allow(void) +{ + return true; +} static struct virtio_transport loopback_transport = { .transport = { @@ -79,6 +83,8 @@ static struct virtio_transport loopback_transport = { .seqpacket_allow = vsock_loopback_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = vsock_loopback_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index f8905400ee07..d2c264030017 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -34,6 +34,16 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) q->ring_mask = nentries - 1; size = xskq_get_ring_size(q, umem_queue); + + /* size which is overflowing or close to SIZE_MAX will become 0 in + * PAGE_ALIGN(), checking SIZE_MAX is enough due to the previous + * is_power_of_2(), the rest will be handled by vmalloc_user() + */ + if (unlikely(size == SIZE_MAX)) { + kfree(q); + return NULL; + } + size = PAGE_ALIGN(size); q->ring = vmalloc_user(size); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6c707ebcebb9..90af76fa9dd8 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -169,6 +169,9 @@ endif TPROGS_CFLAGS += -Wall -O2 TPROGS_CFLAGS += -Wmissing-prototypes TPROGS_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(call try-run,\ + printf "int main() { return 0; }" |\ + $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 7a788bb837fc..7a09ac74fac0 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -17,9 +17,9 @@ static void usage(const char *cmd) { - printf("USAGE: %s [-i num_progs] [-h]\n", cmd); - printf(" -i num_progs # number of progs of the test\n"); - printf(" -h # help\n"); + printf("USAGE: %s [-i nr_tests] [-h]\n", cmd); + printf(" -i nr_tests # rounds of test to run\n"); + printf(" -h # help\n"); } static void verify_map(int map_id) @@ -45,14 +45,14 @@ static void verify_map(int map_id) } } -static int test(char *filename, int num_progs) +static int test(char *filename, int nr_tests) { - int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; - struct bpf_link *links[num_progs * 4]; - struct bpf_object *objs[num_progs]; + int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0; + struct bpf_link **links = NULL; + struct bpf_object *objs[nr_tests]; struct bpf_program *prog; - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { objs[i] = bpf_object__open_file(filename, NULL); if (libbpf_get_error(objs[i])) { fprintf(stderr, "opening BPF object file failed\n"); @@ -60,6 +60,19 @@ static int test(char *filename, int num_progs) goto cleanup; } + /* One-time initialization */ + if (!links) { + int nr_progs = 0; + + bpf_object__for_each_program(prog, objs[i]) + nr_progs += 1; + + links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *)); + + if (!links) + goto cleanup; + } + /* load BPF program */ if (bpf_object__load(objs[i])) { fprintf(stderr, "loading BPF object file failed\n"); @@ -101,14 +114,18 @@ static int test(char *filename, int num_progs) close(fd); /* verify the map */ - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { verify_map(map0_fds[i]); verify_map(map1_fds[i]); } cleanup: - for (j--; j >= 0; j--) - bpf_link__destroy(links[j]); + if (links) { + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + free(links); + } for (i--; i >= 0; i--) bpf_object__close(objs[i]); @@ -117,13 +134,13 @@ cleanup: int main(int argc, char **argv) { - int opt, num_progs = 1; + int opt, nr_tests = 1; char filename[256]; while ((opt = getopt(argc, argv, "i:h")) != -1) { switch (opt) { case 'i': - num_progs = atoi(optarg); + nr_tests = atoi(optarg); break; case 'h': default: @@ -134,5 +151,5 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - return test(filename, num_progs); + return test(filename, nr_tests); } diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index c6fc50d67214..85fb5c22529a 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -44,13 +44,12 @@ static const struct trusted_key_source trusted_key_sources[] = { #endif }; -DEFINE_STATIC_CALL_NULL(trusted_key_init, *trusted_key_sources[0].ops->init); DEFINE_STATIC_CALL_NULL(trusted_key_seal, *trusted_key_sources[0].ops->seal); DEFINE_STATIC_CALL_NULL(trusted_key_unseal, *trusted_key_sources[0].ops->unseal); DEFINE_STATIC_CALL_NULL(trusted_key_get_random, *trusted_key_sources[0].ops->get_random); -DEFINE_STATIC_CALL_NULL(trusted_key_exit, *trusted_key_sources[0].ops->exit); +static void (*trusted_key_exit)(void); static unsigned char migratable; enum { @@ -359,19 +358,16 @@ static int __init init_trusted(void) if (!get_random) get_random = kernel_get_random; - static_call_update(trusted_key_init, - trusted_key_sources[i].ops->init); static_call_update(trusted_key_seal, trusted_key_sources[i].ops->seal); static_call_update(trusted_key_unseal, trusted_key_sources[i].ops->unseal); static_call_update(trusted_key_get_random, get_random); - static_call_update(trusted_key_exit, - trusted_key_sources[i].ops->exit); + trusted_key_exit = trusted_key_sources[i].ops->exit; migratable = trusted_key_sources[i].ops->migratable; - ret = static_call(trusted_key_init)(); + ret = trusted_key_sources[i].ops->init(); if (!ret) break; } @@ -388,7 +384,8 @@ static int __init init_trusted(void) static void __exit cleanup_trusted(void) { - static_call_cond(trusted_key_exit)(); + if (trusted_key_exit) + (*trusted_key_exit)(); } late_initcall(init_trusted); diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index f9b77353c266..c6031f744099 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -185,10 +185,14 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->speaker_id, "wmfw"); if (!ret) { /* try cirrus/part-dspN-fwtype-sub<-spkidN><-ampname>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, - CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, cs35l41->amp_name, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, + CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, cs35l41->amp_name, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + + return 0; } /* try cirrus/part-dspN-fwtype-sub<-ampname>.wmfw */ @@ -197,10 +201,14 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->amp_name, -1, "wmfw"); if (!ret) { /* try cirrus/part-dspN-fwtype-sub<-spkidN><-ampname>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, - CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, cs35l41->amp_name, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, + CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, cs35l41->amp_name, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + + return 0; } /* try cirrus/part-dspN-fwtype-sub<-spkidN>.wmfw */ @@ -215,10 +223,14 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->amp_name, cs35l41->speaker_id, "bin"); if (ret) /* try cirrus/part-dspN-fwtype-sub<-spkidN>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, - coeff_filename, CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, NULL, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, + coeff_filename, CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, NULL, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + + return 0; } /* try cirrus/part-dspN-fwtype-sub.wmfw */ @@ -233,12 +245,50 @@ static int cs35l41_request_firmware_files_spkid(struct cs35l41_hda *cs35l41, cs35l41->speaker_id, "bin"); if (ret) /* try cirrus/part-dspN-fwtype-sub<-spkidN>.bin */ - return cs35l41_request_firmware_file(cs35l41, coeff_firmware, - coeff_filename, CS35L41_FIRMWARE_ROOT, - cs35l41->acpi_subsystem_id, NULL, - cs35l41->speaker_id, "bin"); + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, + coeff_filename, CS35L41_FIRMWARE_ROOT, + cs35l41->acpi_subsystem_id, NULL, + cs35l41->speaker_id, "bin"); + if (ret) + goto coeff_err; + } + + return ret; +coeff_err: + release_firmware(*wmfw_firmware); + kfree(*wmfw_filename); + return ret; +} + +static int cs35l41_fallback_firmware_file(struct cs35l41_hda *cs35l41, + const struct firmware **wmfw_firmware, + char **wmfw_filename, + const struct firmware **coeff_firmware, + char **coeff_filename) +{ + int ret; + + /* Handle fallback */ + dev_warn(cs35l41->dev, "Falling back to default firmware.\n"); + + /* fallback try cirrus/part-dspN-fwtype.wmfw */ + ret = cs35l41_request_firmware_file(cs35l41, wmfw_firmware, wmfw_filename, + CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "wmfw"); + if (ret) + goto err; + + /* fallback try cirrus/part-dspN-fwtype.bin */ + ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, + CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "bin"); + if (ret) { + release_firmware(*wmfw_firmware); + kfree(*wmfw_filename); + goto err; } + return 0; +err: + dev_warn(cs35l41->dev, "Unable to find firmware and tuning\n"); return ret; } @@ -254,7 +304,6 @@ static int cs35l41_request_firmware_files(struct cs35l41_hda *cs35l41, ret = cs35l41_request_firmware_files_spkid(cs35l41, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename); goto out; - } /* try cirrus/part-dspN-fwtype-sub<-ampname>.wmfw */ @@ -267,6 +316,9 @@ static int cs35l41_request_firmware_files(struct cs35l41_hda *cs35l41, CS35L41_FIRMWARE_ROOT, cs35l41->acpi_subsystem_id, cs35l41->amp_name, -1, "bin"); + if (ret) + goto coeff_err; + goto out; } @@ -286,32 +338,23 @@ static int cs35l41_request_firmware_files(struct cs35l41_hda *cs35l41, CS35L41_FIRMWARE_ROOT, cs35l41->acpi_subsystem_id, NULL, -1, "bin"); + if (ret) + goto coeff_err; } out: - if (!ret) - return 0; + if (ret) + /* if all attempts at finding firmware fail, try fallback */ + goto fallback; - /* Handle fallback */ - dev_warn(cs35l41->dev, "Falling back to default firmware.\n"); + return 0; +coeff_err: release_firmware(*wmfw_firmware); kfree(*wmfw_filename); - - /* fallback try cirrus/part-dspN-fwtype.wmfw */ - ret = cs35l41_request_firmware_file(cs35l41, wmfw_firmware, wmfw_filename, - CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "wmfw"); - if (!ret) - /* fallback try cirrus/part-dspN-fwtype.bin */ - ret = cs35l41_request_firmware_file(cs35l41, coeff_firmware, coeff_filename, - CS35L41_FIRMWARE_ROOT, NULL, NULL, -1, "bin"); - - if (ret) { - release_firmware(*wmfw_firmware); - kfree(*wmfw_filename); - dev_warn(cs35l41->dev, "Unable to find firmware and tuning\n"); - } - return ret; +fallback: + return cs35l41_fallback_firmware_file(cs35l41, wmfw_firmware, wmfw_filename, + coeff_firmware, coeff_filename); } #if IS_ENABLED(CONFIG_EFI) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 751783f3a15c..3eeecf67c17b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7343,6 +7343,7 @@ enum { ALC245_FIXUP_HP_MUTE_LED_COEFBIT, ALC245_FIXUP_HP_X360_MUTE_LEDS, ALC287_FIXUP_THINKPAD_I2S_SPK, + ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9441,6 +9442,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, }, + [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9854,7 +9861,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC225_FIXUP_HEADSET_JACK), + SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), @@ -9988,14 +9995,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK), - SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), - SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), @@ -10091,7 +10098,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), - SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC225_FIXUP_HEADSET_JACK), + SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 94e9eb8e73f2..15a864dcd7bd 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -244,6 +244,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 13689e718d36..09eef6042aad 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -531,7 +531,10 @@ static int hdmi_codec_fill_codec_params(struct snd_soc_dai *dai, hp->sample_rate = sample_rate; hp->channels = channels; - hcp->chmap_idx = idx; + if (pcm_audio) + hcp->chmap_idx = ca_id; + else + hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN; return 0; } diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index b05b4f73d8aa..fbad1ed06626 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -157,11 +157,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c) return ret; } - ret = devm_add_action_or_reset(&i2c->dev, rt5682_i2c_disable_regulators, - rt5682); - if (ret) - return ret; - ret = regulator_bulk_enable(ARRAY_SIZE(rt5682->supplies), rt5682->supplies); if (ret) { @@ -169,6 +164,11 @@ static int rt5682_i2c_probe(struct i2c_client *i2c) return ret; } + ret = devm_add_action_or_reset(&i2c->dev, rt5682_i2c_disable_regulators, + rt5682); + if (ret) + return ret; + ret = rt5682_get_ldo1(rt5682, &i2c->dev); if (ret) return ret; diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index b976c1946286..420bbf588efe 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -293,7 +293,7 @@ #define ADC3XXX_BYPASS_RPGA 0x80 /* MICBIAS control bits */ -#define ADC3XXX_MICBIAS_MASK 0x2 +#define ADC3XXX_MICBIAS_MASK 0x3 #define ADC3XXX_MICBIAS1_SHIFT 5 #define ADC3XXX_MICBIAS2_SHIFT 3 @@ -1099,7 +1099,7 @@ static int adc3xxx_parse_dt_micbias(struct adc3xxx *adc3xxx, unsigned int val; if (!of_property_read_u32(np, propname, &val)) { - if (val >= ADC3XXX_MICBIAS_AVDD) { + if (val > ADC3XXX_MICBIAS_AVDD) { dev_err(dev, "Invalid property value for '%s'\n", propname); return -EINVAL; } diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 76b5bfc288fd..bab7d34cf585 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -52,8 +52,8 @@ struct codec_priv { unsigned long mclk_freq; unsigned long free_freq; u32 mclk_id; - u32 fll_id; - u32 pll_id; + int fll_id; + int pll_id; }; /** @@ -206,7 +206,7 @@ static int fsl_asoc_card_hw_params(struct snd_pcm_substream *substream, } /* Specific configuration for PLL */ - if (codec_priv->pll_id && codec_priv->fll_id) { + if (codec_priv->pll_id >= 0 && codec_priv->fll_id >= 0) { if (priv->sample_format == SNDRV_PCM_FORMAT_S24_LE) pll_out = priv->sample_rate * 384; else @@ -248,7 +248,7 @@ static int fsl_asoc_card_hw_free(struct snd_pcm_substream *substream) priv->streams &= ~BIT(substream->stream); - if (!priv->streams && codec_priv->pll_id && codec_priv->fll_id) { + if (!priv->streams && codec_priv->pll_id >= 0 && codec_priv->fll_id >= 0) { /* Force freq to be free_freq to avoid error message in codec */ ret = snd_soc_dai_set_sysclk(asoc_rtd_to_codec(rtd, 0), codec_priv->mclk_id, @@ -621,6 +621,10 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->card.dapm_routes = audio_map; priv->card.num_dapm_routes = ARRAY_SIZE(audio_map); priv->card.driver_name = DRIVER_NAME; + + priv->codec_priv.fll_id = -1; + priv->codec_priv.pll_id = -1; + /* Diversify the card configurations */ if (of_device_is_compatible(np, "fsl,imx-audio-cs42888")) { codec_dai_name = "cs42888"; diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 1e4020fae05a..8a9a30dd31e2 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -710,10 +710,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) { unsigned int ofs = sai->soc_data->reg_offset; bool tx = dir == TX; - u32 xcsr, count = 100; + u32 xcsr, count = 100, mask; + + if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output) + mask = FSL_SAI_CSR_TERE; + else + mask = FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE; regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), - FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE, 0); + mask, 0); /* TERE will remain set till the end of current frame */ do { diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 5b18a4af022f..2588ec735dbd 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -310,7 +310,8 @@ int asoc_simple_startup(struct snd_pcm_substream *substream) if (fixed_sysclk % props->mclk_fs) { dev_err(rtd->dev, "fixed sysclk %u not divisible by mclk_fs %u\n", fixed_sysclk, props->mclk_fs); - return -EINVAL; + ret = -EINVAL; + goto codec_err; } ret = snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, fixed_rate, fixed_rate); diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 190f11366e84..274417e39e7d 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -759,10 +759,12 @@ static int asoc_simple_probe(struct platform_device *pdev) struct snd_soc_dai_link *dai_link = priv->dai_link; struct simple_dai_props *dai_props = priv->dai_props; + ret = -EINVAL; + cinfo = dev->platform_data; if (!cinfo) { dev_err(dev, "no info for asoc-simple-card\n"); - return -EINVAL; + goto err; } if (!cinfo->name || @@ -771,7 +773,7 @@ static int asoc_simple_probe(struct platform_device *pdev) !cinfo->platform || !cinfo->cpu_dai.name) { dev_err(dev, "insufficient asoc_simple_card_info settings\n"); - return -EINVAL; + goto err; } cpus = dai_link->cpus; diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index f8a3e8a91761..9904a9e33ccc 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -808,6 +808,16 @@ static const struct platform_device_id board_ids[] = { SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK | SOF_ES8336_JD_INVERTED), }, + { + .name = "mtl_es83x6_c1_h02", + .driver_data = (kernel_ulong_t)(SOF_ES8336_SSP_CODEC(1) | + SOF_NO_OF_HDMI_CAPTURE_SSP(2) | + SOF_HDMI_CAPTURE_1_SSP(0) | + SOF_HDMI_CAPTURE_2_SSP(2) | + SOF_SSP_HDMI_CAPTURE_PRESENT | + SOF_ES8336_SPEAKERS_EN_GPIO1_QUIRK | + SOF_ES8336_JD_INVERTED), + }, { } }; MODULE_DEVICE_TABLE(platform, board_ids); diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 5a1c750e6ae6..842649501e30 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -380,6 +380,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { .callback = sof_sdw_quirk_cb, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B14"), + }, + /* No Jack */ + .driver_data = (void *)SOF_SDW_TGL_HDMI, + }, + + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B29"), }, .driver_data = (void *)(SOF_SDW_TGL_HDMI | diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 8e995edf4c10..5103e75ac830 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -656,18 +656,18 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = { .sof_tplg_filename = "sof-adl-rt1316-l2-mono-rt714-l3.tplg", }, { - .link_mask = 0x3, /* rt1316 on link1 & rt714 on link0 */ - .links = adl_sdw_rt1316_link1_rt714_link0, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-adl-rt1316-l1-mono-rt714-l0.tplg", - }, - { .link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */ .links = adl_sdw_rt1316_link12_rt714_link0, .drv_name = "sof_sdw", .sof_tplg_filename = "sof-adl-rt1316-l12-rt714-l0.tplg", }, { + .link_mask = 0x3, /* rt1316 on link1 & rt714 on link0 */ + .links = adl_sdw_rt1316_link1_rt714_link0, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-adl-rt1316-l1-mono-rt714-l0.tplg", + }, + { .link_mask = 0x5, /* 2 active links required */ .links = adl_sdw_rt1316_link2_rt714_link0, .drv_name = "sof_sdw", diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 0304246d2922..92498d1d6c8d 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -30,6 +30,16 @@ static const struct snd_soc_acpi_codecs mtl_rt5682_rt5682s_hp = { .codecs = {"10EC5682", "RTL5682"}, }; +static const struct snd_soc_acpi_codecs mtl_essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + +static const struct snd_soc_acpi_codecs mtl_lt6911_hdmi = { + .num_codecs = 1, + .codecs = {"INTC10B0"} +}; + struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = { { .comp_ids = &mtl_rt5682_rt5682s_hp, @@ -52,6 +62,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_mtl_machines[] = { .quirk_data = &mtl_rt1019p_amp, .sof_tplg_filename = "sof-mtl-rt1019-rt5682.tplg", }, + { + .comp_ids = &mtl_essx_83x6, + .drv_name = "mtl_es83x6_c1_h02", + .machine_quirk = snd_soc_acpi_codec_list, + .quirk_data = &mtl_lt6911_hdmi, + .sof_tplg_filename = "sof-mtl-es83x6-ssp1-hdmi-ssp02.tplg", + }, + { + .comp_ids = &mtl_essx_83x6, + .drv_name = "sof-essx8336", + .sof_tplg_filename = "sof-mtl-es8336", /* the tplg suffix is added at run time */ + .tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER | + SND_SOC_ACPI_TPLG_INTEL_SSP_MSB | + SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER, + }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_mtl_machines); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index cc442c52cdea..9de98c01d815 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1347,7 +1347,7 @@ static int soc_init_pcm_runtime(struct snd_soc_card *card, snd_soc_runtime_get_dai_fmt(rtd); ret = snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); if (ret) - return ret; + goto err; /* add DPCM sysfs entries */ soc_dpcm_debugfs_add(rtd); @@ -1372,17 +1372,26 @@ static int soc_init_pcm_runtime(struct snd_soc_card *card, /* create compress_device if possible */ ret = snd_soc_dai_compress_new(cpu_dai, rtd, num); if (ret != -ENOTSUPP) - return ret; + goto err; /* create the pcm */ ret = soc_new_pcm(rtd, num); if (ret < 0) { dev_err(card->dev, "ASoC: can't create pcm %s :%d\n", dai_link->stream_name, ret); - return ret; + goto err; } - return snd_soc_pcm_dai_new(rtd); + ret = snd_soc_pcm_dai_new(rtd); + if (ret < 0) + goto err; + + rtd->initialized = true; + + return 0; +err: + snd_soc_link_exit(rtd); + return ret; } static void soc_set_name_prefix(struct snd_soc_card *card, @@ -1445,8 +1454,8 @@ static int soc_probe_component(struct snd_soc_card *card, if (component->card) { if (component->card != card) { dev_err(component->dev, - "Trying to bind component to card \"%s\" but is already bound to card \"%s\"\n", - card->name, component->card->name); + "Trying to bind component \"%s\" to card \"%s\" but is already bound to card \"%s\"\n", + component->name, card->name, component->card->name); return -ENODEV; } return 0; @@ -1980,7 +1989,8 @@ static void soc_cleanup_card_resources(struct snd_soc_card *card) /* release machine specific resources */ for_each_card_rtds(card, rtd) - snd_soc_link_exit(rtd); + if (rtd->initialized) + snd_soc_link_exit(rtd); /* remove and free each DAI */ soc_remove_link_dais(card); soc_remove_link_components(card); diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index d0653d775c87..cad222eb9a29 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -44,8 +44,8 @@ static struct device *dmaengine_dma_dev(struct dmaengine_pcm *pcm, * platforms which make use of the snd_dmaengine_dai_dma_data struct for their * DAI DMA data. Internally the function will first call * snd_hwparams_to_dma_slave_config to fill in the slave config based on the - * hw_params, followed by snd_dmaengine_set_config_from_dai_data to fill in the - * remaining fields based on the DAI DMA data. + * hw_params, followed by snd_dmaengine_pcm_set_config_from_dai_data to fill in + * the remaining fields based on the DAI DMA data. */ int snd_dmaengine_pcm_prepare_slave_config(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct dma_slave_config *slave_config) diff --git a/sound/soc/sof/amd/pci-rmb.c b/sound/soc/sof/amd/pci-rmb.c index 9935e457b467..a7ae76efc2dd 100644 --- a/sound/soc/sof/amd/pci-rmb.c +++ b/sound/soc/sof/amd/pci-rmb.c @@ -35,7 +35,6 @@ static const struct sof_amd_acp_desc rembrandt_chip_info = { .dsp_intr_base = ACP6X_DSP_SW_INTR_BASE, .sram_pte_offset = ACP6X_SRAM_PTE_OFFSET, .hw_semaphore_offset = ACP6X_AXI2DAGB_SEM_0, - .acp_clkmux_sel = ACP6X_CLKMUX_SEL, .fusion_dsp_offset = ACP6X_DSP_FUSION_RUNSTALL, .probe_reg_offset = ACP6X_FUTURE_REG_ACLK_0, }; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 985b1aea9cdc..409fc1164694 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1204,6 +1204,13 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, cval->res = 16; } break; + case USB_ID(0x1bcf, 0x2283): /* NexiGo N930AF FHD Webcam */ + if (!strcmp(kctl->id.name, "Mic Capture Volume")) { + usb_audio_info(chip, + "set resolution quirk: cval->res = 16\n"); + cval->res = 16; + } + break; } } diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 598659d761cc..4e64842245e1 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1994,7 +1994,11 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip, /* mic works only when ep packet size is set to wMaxPacketSize */ fp->attributes |= UAC_EP_CS_ATTR_FILL_MAX; break; - + case USB_ID(0x3511, 0x2b1e): /* Opencomm2 UC USB Bluetooth dongle */ + /* mic works only when ep pitch control is not set */ + if (stream == SNDRV_PCM_STREAM_CAPTURE) + fp->attributes &= ~UAC_EP_CS_ATTR_PITCH_CONTROL; + break; } } @@ -2173,6 +2177,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index bd015ec9847b..2ce900f66d6e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -36,11 +36,14 @@ CGROUP COMMANDS | **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | | **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | | **cgroup_inet4_connect** | **cgroup_inet6_connect** | -| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | +| **cgroup_unix_connect** | **cgroup_inet4_getpeername** | +| **cgroup_inet6_getpeername** | **cgroup_unix_getpeername** | | **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | -| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | +| **cgroup_unix_getsockname** | **cgroup_udp4_sendmsg** | +| **cgroup_udp6_sendmsg** | **cgroup_unix_sendmsg** | | **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | -| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_unix_recvmsg** | **cgroup_sysctl** | +| **cgroup_getsockopt** | **cgroup_setsockopt** | | **cgroup_inet_sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } @@ -102,21 +105,28 @@ DESCRIPTION **post_bind6** return from bind(2) for an inet6 socket (since 4.17); **connect4** call to connect(2) for an inet4 socket (since 4.17); **connect6** call to connect(2) for an inet6 socket (since 4.17); + **connect_unix** call to connect(2) for a unix socket (since 6.7); **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp4 socket (since 4.18); **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18); + **sendmsg_unix** call to sendto(2), sendmsg(2), sendmmsg(2) for + an unconnected unix socket (since 6.7); **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp6 socket (since 5.2); + **recvmsg_unix** call to recvfrom(2), recvmsg(2), recvmmsg(2) for + an unconnected unix socket (since 6.7); **sysctl** sysctl access (since 5.2); **getsockopt** call to getsockopt (since 5.3); **setsockopt** call to setsockopt (since 5.3); **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8); **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); + **getpeername_unix** call to getpeername(2) for a unix socket (since 6.7); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **getsockname_unix** call to getsockname(2) for a unix socket (since 6.7); **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index dcae81bd27ed..58e6a5b10ef7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -47,9 +47,11 @@ PROG COMMANDS | **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | | **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | -| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | -| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | -| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | +| **cgroup/connect4** | **cgroup/connect6** | **cgroup/connect_unix** | +| **cgroup/getpeername4** | **cgroup/getpeername6** | **cgroup/getpeername_unix** | +| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/getsockname_unix** | +| **cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** | +| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** | | **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 085bf18f3659..6e4f7ce6bc01 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -480,13 +480,13 @@ _bpftool() action tracepoint raw_tracepoint \ xdp perf_event cgroup/skb cgroup/sock \ cgroup/dev lwt_in lwt_out lwt_xmit \ - lwt_seg6local sockops sk_skb sk_msg \ - lirc_mode2 cgroup/bind4 cgroup/bind6 \ - cgroup/connect4 cgroup/connect6 \ - cgroup/getpeername4 cgroup/getpeername6 \ - cgroup/getsockname4 cgroup/getsockname6 \ - cgroup/sendmsg4 cgroup/sendmsg6 \ - cgroup/recvmsg4 cgroup/recvmsg6 \ + lwt_seg6local sockops sk_skb sk_msg lirc_mode2 \ + cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 cgroup/connect_unix \ + cgroup/getpeername4 cgroup/getpeername6 cgroup/getpeername_unix \ + cgroup/getsockname4 cgroup/getsockname6 cgroup/getsockname_unix \ + cgroup/sendmsg4 cgroup/sendmsg6 cgroup/sendmsg_unix \ + cgroup/recvmsg4 cgroup/recvmsg6 cgroup/recvmsg_unix \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ cgroup/setsockopt cgroup/sock_release struct_ops \ diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index ac846b0805b4..af6898c0f388 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,13 +28,15 @@ " cgroup_device | cgroup_inet4_bind |\n" \ " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ - " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ - " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ - " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ - " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ - " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ - " cgroup_getsockopt | cgroup_setsockopt |\n" \ - " cgroup_inet_sock_release }" + " cgroup_inet6_connect | cgroup_unix_connect |\n" \ + " cgroup_inet4_getpeername | cgroup_inet6_getpeername |\n" \ + " cgroup_unix_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_unix_getsockname |\n" \ + " cgroup_udp4_sendmsg | cgroup_udp6_sendmsg |\n" \ + " cgroup_unix_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_unix_recvmsg |\n" \ + " cgroup_sysctl | cgroup_getsockopt |\n" \ + " cgroup_setsockopt | cgroup_inet_sock_release }" static unsigned int query_flags; static struct btf *btf_vmlinux; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 04c47745b3ea..ee3ce2b8000d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -708,17 +708,22 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h codegen("\ \n\ - skel->%1$s = skel_prep_map_data((void *)\"\\ \n\ - ", ident); + { \n\ + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + "); mmap_data = bpf_map__initial_value(map, &mmap_size); print_hex(mmap_data, mmap_size); codegen("\ \n\ - \", %1$zd, %2$zd); \n\ - if (!skel->%3$s) \n\ - goto cleanup; \n\ - skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\ - ", bpf_map_mmap_sz(map), mmap_size, ident); + \"; \n\ + \n\ + skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\ + sizeof(data) - 1);\n\ + if (!skel->%1$s) \n\ + goto cleanup; \n\ + skel->maps.%1$s.initial_value = (__u64) (long) skel->%1$s;\n\ + } \n\ + ", ident, bpf_map_mmap_sz(map)); } codegen("\ \n\ @@ -733,32 +738,30 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h { \n\ struct bpf_load_and_run_opts opts = {}; \n\ int err; \n\ - \n\ - opts.ctx = (struct bpf_loader_ctx *)skel; \n\ - opts.data_sz = %2$d; \n\ - opts.data = (void *)\"\\ \n\ + static const char opts_data[] __attribute__((__aligned__(8))) = \"\\\n\ ", - obj_name, opts.data_sz); + obj_name); print_hex(opts.data, opts.data_sz); codegen("\ \n\ \"; \n\ + static const char opts_insn[] __attribute__((__aligned__(8))) = \"\\\n\ "); - - codegen("\ - \n\ - opts.insns_sz = %d; \n\ - opts.insns = (void *)\"\\ \n\ - ", - opts.insns_sz); print_hex(opts.insns, opts.insns_sz); codegen("\ \n\ \"; \n\ + \n\ + opts.ctx = (struct bpf_loader_ctx *)skel; \n\ + opts.data_sz = sizeof(opts_data) - 1; \n\ + opts.data = (void *)opts_data; \n\ + opts.insns_sz = sizeof(opts_insn) - 1; \n\ + opts.insns = (void *)opts_insn; \n\ + \n\ err = bpf_load_and_run(&opts); \n\ if (err < 0) \n\ return err; \n\ - ", obj_name); + "); bpf_object__for_each_map(map, obj) { const char *mmap_flags; @@ -1209,7 +1212,7 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ - s->data = %2$s__elf_bytes(&s->data_sz); \n\ + s->data = %1$s__elf_bytes(&s->data_sz); \n\ \n\ obj->skeleton = s; \n\ return 0; \n\ @@ -1218,12 +1221,12 @@ static int do_skeleton(int argc, char **argv) return err; \n\ } \n\ \n\ - static inline const void *%2$s__elf_bytes(size_t *sz) \n\ + static inline const void *%1$s__elf_bytes(size_t *sz) \n\ { \n\ - *sz = %1$d; \n\ - return (const void *)\"\\ \n\ - " - , file_sz, obj_name); + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + ", + obj_name + ); /* embed contents of BPF object file */ print_hex(obj_data, file_sz); @@ -1231,6 +1234,9 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \"; \n\ + \n\ + *sz = sizeof(data) - 1; \n\ + return (const void *)data; \n\ } \n\ \n\ #ifdef __cplusplus \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2e5c231e08ac..4b1407b05056 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -265,6 +265,7 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_bool_field(json_wtr, "retprobe", info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN); jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count); + jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed); jsonw_name(json_wtr, "funcs"); jsonw_start_array(json_wtr); addrs = u64_to_ptr(info->kprobe_multi.addrs); @@ -301,6 +302,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_string_field(wtr, "func", u64_to_ptr(info->perf_event.kprobe.func_name)); jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset); + jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed); } static void @@ -641,6 +643,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) else printf("\n\tkprobe.multi "); printf("func_cnt %u ", info->kprobe_multi.count); + if (info->kprobe_multi.missed) + printf("missed %llu ", info->kprobe_multi.missed); addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs); qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64); @@ -683,6 +687,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info) printf("%s", buf); if (info->perf_event.kprobe.offset) printf("+%#x", info->perf_event.kprobe.offset); + if (info->perf_event.kprobe.missed) + printf(" missed %llu", info->perf_event.kprobe.missed); printf(" "); } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8443a149dd17..7ec4f5671e7a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2475,9 +2475,10 @@ static int do_help(int argc, char **argv) " sk_reuseport | flow_dissector | cgroup/sysctl |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" - " cgroup/getpeername4 | cgroup/getpeername6 |\n" - " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" - " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" + " cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n" + " cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n" + " cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n" + " cgroup/sendmsg°unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 27f5e7dfc2f7..264eeb9c46a9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1171,12 +1171,79 @@ static int process_ip_string(FILE *f, char *ip_string, int type) return 0; } +/* + * Only IPv4 subnet strings needs to be converted to plen + * For IPv6 the subnet is already privided in plen format + */ +static int kvp_subnet_to_plen(char *subnet_addr_str) +{ + int plen = 0; + struct in_addr subnet_addr4; + + /* + * Convert subnet address to binary representation + */ + if (inet_pton(AF_INET, subnet_addr_str, &subnet_addr4) == 1) { + uint32_t subnet_mask = ntohl(subnet_addr4.s_addr); + + while (subnet_mask & 0x80000000) { + plen++; + subnet_mask <<= 1; + } + } else { + return -1; + } + + return plen; +} + +static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet, + int is_ipv6) +{ + char addr[INET6_ADDRSTRLEN]; + char subnet_addr[INET6_ADDRSTRLEN]; + int error, i = 0; + int ip_offset = 0, subnet_offset = 0; + int plen; + + memset(addr, 0, sizeof(addr)); + memset(subnet_addr, 0, sizeof(subnet_addr)); + + while (parse_ip_val_buffer(ip_string, &ip_offset, addr, + (MAX_IP_ADDR_SIZE * 2)) && + parse_ip_val_buffer(subnet, + &subnet_offset, + subnet_addr, + (MAX_IP_ADDR_SIZE * + 2))) { + if (!is_ipv6) + plen = kvp_subnet_to_plen((char *)subnet_addr); + else + plen = atoi(subnet_addr); + + if (plen < 0) + return plen; + + error = fprintf(f, "address%d=%s/%d\n", ++i, (char *)addr, + plen); + if (error < 0) + return error; + + memset(addr, 0, sizeof(addr)); + memset(subnet_addr, 0, sizeof(subnet_addr)); + } + + return 0; +} + static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) { int error = 0; - char if_file[PATH_MAX]; - FILE *file; + char if_filename[PATH_MAX]; + char nm_filename[PATH_MAX]; + FILE *ifcfg_file, *nmfile; char cmd[PATH_MAX]; + int is_ipv6 = 0; char *mac_addr; int str_len; @@ -1197,7 +1264,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * in a given distro to configure the interface and so are free * ignore information that may not be relevant. * - * Here is the format of the ip configuration file: + * Here is the ifcfg format of the ip configuration file: * * HWADDR=macaddr * DEVICE=interface name @@ -1220,6 +1287,32 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as * IPV6NETMASK. * + * Here is the keyfile format of the ip configuration file: + * + * [ethernet] + * mac-address=macaddr + * [connection] + * interface-name=interface name + * + * [ipv4] + * method=<protocol> (where <protocol> is "auto" if DHCP is configured + * or "manual" if no boot-time protocol should be used) + * + * address1=ipaddr1/plen + * address2=ipaddr2/plen + * + * gateway=gateway1;gateway2 + * + * dns=dns1;dns2 + * + * [ipv6] + * address1=ipaddr1/plen + * address2=ipaddr2/plen + * + * gateway=gateway1;gateway2 + * + * dns=dns1;dns2 + * * The host can specify multiple ipv4 and ipv6 addresses to be * configured for the interface. Furthermore, the configuration * needs to be persistent. A subsequent GET call on the interface @@ -1227,14 +1320,29 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * call. */ - snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC, - "/ifcfg-", if_name); + /* + * We are populating both ifcfg and nmconnection files + */ + snprintf(if_filename, sizeof(if_filename), "%s%s%s", KVP_CONFIG_LOC, + "/ifcfg-", if_name); - file = fopen(if_file, "w"); + ifcfg_file = fopen(if_filename, "w"); - if (file == NULL) { + if (!ifcfg_file) { syslog(LOG_ERR, "Failed to open config file; error: %d %s", - errno, strerror(errno)); + errno, strerror(errno)); + return HV_E_FAIL; + } + + snprintf(nm_filename, sizeof(nm_filename), "%s%s%s%s", KVP_CONFIG_LOC, + "/", if_name, ".nmconnection"); + + nmfile = fopen(nm_filename, "w"); + + if (!nmfile) { + syslog(LOG_ERR, "Failed to open config file; error: %d %s", + errno, strerror(errno)); + fclose(ifcfg_file); return HV_E_FAIL; } @@ -1248,14 +1356,31 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) goto setval_error; } - error = kvp_write_file(file, "HWADDR", "", mac_addr); - free(mac_addr); + error = kvp_write_file(ifcfg_file, "HWADDR", "", mac_addr); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(ifcfg_file, "DEVICE", "", if_name); + if (error < 0) + goto setmac_error; + + error = fprintf(nmfile, "\n[connection]\n"); + if (error < 0) + goto setmac_error; + + error = kvp_write_file(nmfile, "interface-name", "", if_name); if (error) - goto setval_error; + goto setmac_error; + + error = fprintf(nmfile, "\n[ethernet]\n"); + if (error < 0) + goto setmac_error; - error = kvp_write_file(file, "DEVICE", "", if_name); + error = kvp_write_file(nmfile, "mac-address", "", mac_addr); if (error) - goto setval_error; + goto setmac_error; + + free(mac_addr); /* * The dhcp_enabled flag is only for IPv4. In the case the host only @@ -1263,47 +1388,91 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * proceed to parse and pass the IPv6 information to the * disto-specific script hv_set_ifconfig. */ + + /* + * First populate the ifcfg file format + */ if (new_val->dhcp_enabled) { - error = kvp_write_file(file, "BOOTPROTO", "", "dhcp"); + error = kvp_write_file(ifcfg_file, "BOOTPROTO", "", "dhcp"); if (error) goto setval_error; - } else { - error = kvp_write_file(file, "BOOTPROTO", "", "none"); + error = kvp_write_file(ifcfg_file, "BOOTPROTO", "", "none"); if (error) goto setval_error; } - /* - * Write the configuration for ipaddress, netmask, gateway and - * name servers. - */ - - error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR); + error = process_ip_string(ifcfg_file, (char *)new_val->ip_addr, + IPADDR); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->sub_net, NETMASK); + error = process_ip_string(ifcfg_file, (char *)new_val->sub_net, + NETMASK); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY); + error = process_ip_string(ifcfg_file, (char *)new_val->gate_way, + GATEWAY); if (error) goto setval_error; - error = process_ip_string(file, (char *)new_val->dns_addr, DNS); + error = process_ip_string(ifcfg_file, (char *)new_val->dns_addr, DNS); if (error) goto setval_error; - fclose(file); + if (new_val->addr_family == ADDR_FAMILY_IPV6) { + error = fprintf(nmfile, "\n[ipv6]\n"); + if (error < 0) + goto setval_error; + is_ipv6 = 1; + } else { + error = fprintf(nmfile, "\n[ipv4]\n"); + if (error < 0) + goto setval_error; + } + + /* + * Now we populate the keyfile format + */ + + if (new_val->dhcp_enabled) { + error = kvp_write_file(nmfile, "method", "", "auto"); + if (error < 0) + goto setval_error; + } else { + error = kvp_write_file(nmfile, "method", "", "manual"); + if (error < 0) + goto setval_error; + } + + /* + * Write the configuration for ipaddress, netmask, gateway and + * name services + */ + error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr, + (char *)new_val->sub_net, is_ipv6); + if (error < 0) + goto setval_error; + + error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + if (error < 0) + goto setval_error; + + error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); + if (error < 0) + goto setval_error; + + fclose(nmfile); + fclose(ifcfg_file); /* * Now that we have populated the configuration file, * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", - "hv_set_ifconfig", if_file); + str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", + "hv_set_ifconfig", if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. @@ -1316,14 +1485,16 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (system(cmd)) { syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s", - cmd, errno, strerror(errno)); + cmd, errno, strerror(errno)); return HV_E_FAIL; } return 0; - +setmac_error: + free(mac_addr); setval_error: syslog(LOG_ERR, "Failed to write config file"); - fclose(file); + fclose(ifcfg_file); + fclose(nmfile); return error; } diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index d10fe35b7f25..ae5a7a8249a2 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -18,12 +18,12 @@ # # This example script is based on a RHEL environment. # -# Here is the format of the ip configuration file: +# Here is the ifcfg format of the ip configuration file: # # HWADDR=macaddr # DEVICE=interface name # BOOTPROTO=<protocol> (where <protocol> is "dhcp" if DHCP is configured -# or "none" if no boot-time protocol should be used) +# or "none" if no boot-time protocol should be used) # # IPADDR0=ipaddr1 # IPADDR1=ipaddr2 @@ -41,6 +41,32 @@ # tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as # IPV6NETMASK. # +# Here is the keyfile format of the ip configuration file: +# +# [ethernet] +# mac-address=macaddr +# [connection] +# interface-name=interface name +# +# [ipv4] +# method=<protocol> (where <protocol> is "auto" if DHCP is configured +# or "manual" if no boot-time protocol should be used) +# +# address1=ipaddr1/plen +# address=ipaddr2/plen +# +# gateway=gateway1;gateway2 +# +# dns=dns1; +# +# [ipv6] +# address1=ipaddr1/plen +# address2=ipaddr1/plen +# +# gateway=gateway1;gateway2 +# +# dns=dns1;dns2 +# # The host can specify multiple ipv4 and ipv6 addresses to be # configured for the interface. Furthermore, the configuration # needs to be persistent. A subsequent GET call on the interface @@ -48,18 +74,19 @@ # call. # - - echo "IPV6INIT=yes" >> $1 echo "NM_CONTROLLED=no" >> $1 echo "PEERDNS=yes" >> $1 echo "ONBOOT=yes" >> $1 - cp $1 /etc/sysconfig/network-scripts/ +chmod 600 $2 +interface=$(echo $2 | awk -F - '{ print $2 }') +filename="${2##*/}" + +sed '/\[connection\]/a autoconnect=true' $2 > /etc/NetworkManager/system-connections/${filename} -interface=$(echo $1 | awk -F - '{ print $2 }') /sbin/ifdown $interface 2>/dev/null /sbin/ifup $interface 2>/dev/null diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5f13db15a3c7..7ba61b75bc0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by @@ -3264,6 +3269,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -5096,6 +5106,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -6532,6 +6544,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ @@ -6547,6 +6560,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ @@ -6960,6 +6974,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6972,6 +6987,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7006,6 +7022,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -7307,9 +7326,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 3803479dbe10..1c13f8e88833 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -362,8 +362,6 @@ struct pt_regs___arm64 { #define __PT_PARM7_REG a6 #define __PT_PARM8_REG a7 -/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ -#define PT_REGS_SYSCALL_REGS(ctx) ctx #define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 9d0296c1726a..2a158e8a8b7c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <libelf.h> #include <gelf.h> #include <fcntl.h> @@ -10,6 +13,17 @@ #define STRERR_BUFSIZE 128 +/* A SHT_GNU_versym section holds 16-bit words. This bit is set if + * the symbol is hidden and can only be seen when referenced using an + * explicit version number. This is a GNU extension. + */ +#define VERSYM_HIDDEN 0x8000 + +/* This is the mask for the rest of the data in a word read from a + * SHT_GNU_versym section. + */ +#define VERSYM_VERSION 0x7fff + int elf_open(const char *binary_path, struct elf_fd *elf_fd) { char errmsg[STRERR_BUFSIZE]; @@ -64,13 +78,18 @@ struct elf_sym { const char *name; GElf_Sym sym; GElf_Shdr sh; + int ver; + bool hidden; }; struct elf_sym_iter { Elf *elf; Elf_Data *syms; + Elf_Data *versyms; + Elf_Data *verdefs; size_t nr_syms; size_t strtabidx; + size_t verdef_strtabidx; size_t next_sym_idx; struct elf_sym sym; int st_type; @@ -111,6 +130,26 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter, iter->nr_syms = iter->syms->d_size / sh.sh_entsize; iter->elf = elf; iter->st_type = st_type; + + /* Version symbol table is meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + iter->versyms = elf_getdata(scn, 0); + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) { + pr_debug("elf: failed to find verdef ELF sections in '%s'\n", binary_path); + return -ENOENT; + } + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + iter->verdef_strtabidx = sh.sh_link; + iter->verdefs = elf_getdata(scn, 0); + return 0; } @@ -119,6 +158,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) struct elf_sym *ret = &iter->sym; GElf_Sym *sym = &ret->sym; const char *name = NULL; + GElf_Versym versym; Elf_Scn *sym_scn; size_t idx; @@ -138,12 +178,80 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) iter->next_sym_idx = idx + 1; ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & VERSYM_VERSION; + ret->hidden = versym & VERSYM_HIDDEN; + } return ret; } return NULL; } +static const char *elf_get_vername(struct elf_sym_iter *iter, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + int offset; + + offset = 0; + while (gelf_getverdef(iter->verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name); + + } + return NULL; +} + +static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym, + const char *name, size_t name_len, const char *lib_ver) +{ + const char *ver_name; + + /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER + * make sure the func part matches the user specified name + */ + if (strncmp(sym->name, name, name_len) != 0) + return false; + + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (sym->name[name_len] != '\0' && sym->name[name_len] != '@') + return false; + + /* If user does not specify symbol version, then we got a match */ + if (!lib_ver) + return true; + + /* If user specifies symbol version, for dynamic symbols, + * get version name from ELF verdef section for comparison. + */ + if (sh_type == SHT_DYNSYM) { + ver_name = elf_get_vername(iter, sym->ver); + if (!ver_name) + return false; + return strcmp(ver_name, lib_ver) == 0; + } + + /* For normal symbols, it is already in form of func@LIB_VER */ + return strcmp(sym->name, name) == 0; +} /* Transform symbol's virtual address (absolute for binaries and relative * for shared libs) into file offset, which is what kernel is expecting @@ -166,7 +274,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym) long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) { int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; - bool is_shared_lib, is_name_qualified; + const char *at_symbol, *lib_ver; + bool is_shared_lib; long ret = -ENOENT; size_t name_len; GElf_Ehdr ehdr; @@ -179,9 +288,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) /* for shared lib case, we do not need to calculate relative offset */ is_shared_lib = ehdr.e_type == ET_DYN; - name_len = strlen(name); - /* Does name specify "@@LIB"? */ - is_name_qualified = strstr(name, "@@") != NULL; + /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */ + at_symbol = strchr(name, '@'); + if (at_symbol) { + name_len = at_symbol - name; + /* skip second @ if it's @@LIB_VER case */ + if (at_symbol[1] == '@') + at_symbol++; + lib_ver = at_symbol + 1; + } else { + name_len = strlen(name); + lib_ver = NULL; + } /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically @@ -201,20 +319,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) goto out; while ((sym = elf_sym_iter_next(&iter))) { - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sym->name, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver)) continue; cur_bind = GELF_ST_BIND(sym->sym.st_info); if (ret > 0) { /* handle multiple matches */ - if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + if (elf_sym_offset(sym) == ret) { + /* same offset, no problem */ + continue; + } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { /* Only accept one non-weak bind. */ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", sym->name, name, binary_path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3a6108e3238b..a295f6acf98f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -82,17 +82,22 @@ static const char * const attach_type_name[] = { [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", @@ -8960,14 +8965,19 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), @@ -11114,7 +11124,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", &probe_type, &binary_path, &func_name); switch (n) { case 1: @@ -11624,14 +11634,14 @@ err_out: static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); - char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; - int n, ret = -EINVAL; + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; long offset = 0; *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", - &probe_type, &binary_path, &func_name, &offset); + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); switch (n) { case 1: /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ @@ -11642,7 +11652,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf prog->name, prog->sec_name); break; case 3: - case 4: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0e52621cba43..475378438545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1229,6 +1229,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct ring; struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -1249,6 +1250,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +/** + * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given + * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance. + * + * @param rb A ringbuffer manager object. + * @param idx An index into the ringbuffers contained within the ringbuffer + * manager object. The index is 0-based and corresponds to the order in which + * ring_buffer__add was called. + * @return A ringbuffer object on success; NULL and errno set if the index is + * invalid. + */ +LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb, + unsigned int idx); + +/** + * @brief **ring__consumer_pos()** returns the current consumer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current consumer position. + */ +LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r); + +/** + * @brief **ring__producer_pos()** returns the current producer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current producer position. + */ +LIBBPF_API unsigned long ring__producer_pos(const struct ring *r); + +/** + * @brief **ring__avail_data_size()** returns the number of bytes in the + * ringbuffer not yet consumed. This has no locking associated with it, so it + * can be inaccurate if operations are ongoing while this is called. However, it + * should still show the correct trend over the long-term. + * + * @param r A ringbuffer object. + * @return The number of bytes not yet consumed. + */ +LIBBPF_API size_t ring__avail_data_size(const struct ring *r); + +/** + * @brief **ring__size()** returns the total size of the ringbuffer's map data + * area (excluding special producer/consumer pages). Effectively this gives the + * amount of usable bytes of data inside the ringbuffer. + * + * @param r A ringbuffer object. + * @return The total size of the ringbuffer map data area. + */ +LIBBPF_API size_t ring__size(const struct ring *r); + +/** + * @brief **ring__map_fd()** returns the file descriptor underlying the given + * ringbuffer. + * + * @param r A ringbuffer object. + * @return The underlying ringbuffer file descriptor + */ +LIBBPF_API int ring__map_fd(const struct ring *r); + +/** + * @brief **ring__consume()** consumes available ringbuffer data without event + * polling. + * + * @param r A ringbuffer object. + * @return The number of records consumed (or INT_MAX, whichever is less), or + * a negative number if any of the callbacks return an error. + */ +LIBBPF_API int ring__consume(struct ring *r); + struct user_ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatibility */ }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 57712321490f..cc973b678a39 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -400,4 +400,11 @@ LIBBPF_1.3.0 { bpf_program__attach_netfilter; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + ring__avail_data_size; + ring__consume; + ring__consumer_pos; + ring__map_fd; + ring__producer_pos; + ring__size; + ring_buffer__ring; } LIBBPF_1.2.0; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 02199364db13..aacb64278a01 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -34,7 +34,7 @@ struct ring { struct ring_buffer { struct epoll_event *events; - struct ring *rings; + struct ring **rings; size_t page_size; int epoll_fd; int ring_cnt; @@ -57,7 +57,7 @@ struct ringbuf_hdr { __u32 pad; }; -static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { munmap(r->consumer_pos, rb->page_size); @@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); r->producer_pos = NULL; } + + free(r); } /* Add extra RINGBUF maps to this ring buffer manager */ @@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return libbpf_err(-ENOMEM); rb->events = tmp; - r = &rb->rings[rb->ring_cnt]; - memset(r, 0, sizeof(*r)); + r = calloc(1, sizeof(*r)); + if (!r) + return libbpf_err(-ENOMEM); + rb->rings[rb->ring_cnt] = r; r->map_fd = map_fd; r->sample_cb = sample_cb; @@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->consumer_pos = tmp; @@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, */ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; if (mmap_sz != (__u64)(size_t)mmap_sz) { + err = -E2BIG; pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); - return libbpf_err(-E2BIG); + goto err_out; } tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } rb->ring_cnt++; return 0; + +err_out: + ringbuf_free_ring(rb, r); + return libbpf_err(err); } void ring_buffer__free(struct ring_buffer *rb) @@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb) return; for (i = 0; i < rb->ring_cnt; ++i) - ringbuf_unmap_ring(rb, &rb->rings[i]); + ringbuf_free_ring(rb, rb->rings[i]); if (rb->epoll_fd >= 0) close(rb->epoll_fd); @@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb) int i; for (i = 0; i < rb->ring_cnt; i++) { - struct ring *ring = &rb->rings[i]; + struct ring *ring = rb->rings[i]; err = ringbuf_process_ring(ring); if (err < 0) @@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; - struct ring *ring = &rb->rings[ring_id]; + struct ring *ring = rb->rings[ring_id]; err = ringbuf_process_ring(ring); if (err < 0) @@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) return rb->epoll_fd; } +struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx) +{ + if (idx >= rb->ring_cnt) + return errno = ERANGE, NULL; + + return rb->rings[idx]; +} + +unsigned long ring__consumer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */ + return smp_load_acquire(r->consumer_pos); +} + +unsigned long ring__producer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in + * the kernel. + */ + return smp_load_acquire(r->producer_pos); +} + +size_t ring__avail_data_size(const struct ring *r) +{ + unsigned long cons_pos, prod_pos; + + cons_pos = ring__consumer_pos(r); + prod_pos = ring__producer_pos(r); + return prod_pos - cons_pos; +} + +size_t ring__size(const struct ring *r) +{ + return r->mask + 1; +} + +int ring__map_fd(const struct ring *r) +{ + return r->map_fd; +} + +int ring__consume(struct ring *r) +{ + int64_t res; + + res = ringbuf_process_ring(r); + if (res < 0) + return libbpf_err(res); + + return res > INT_MAX ? INT_MAX : res; +} + static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) { if (rb->consumer_pos) { diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 0f359ee3c46a..2f47b9cac757 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -27,11 +27,11 @@ protos.a: $(OBJS) %-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) @echo -e "\tGEN $@" - @$(TOOL) --mode user --header --spec $< $(YNL_GEN_ARG_$*) > $@ + @$(TOOL) --mode user --header --spec $< -o $@ $(YNL_GEN_ARG_$*) %-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) @echo -e "\tGEN $@" - @$(TOOL) --mode user --source --spec $< $(YNL_GEN_ARG_$*) > $@ + @$(TOOL) --mode user --source --spec $< -o $@ $(YNL_GEN_ARG_$*) %-user.o: %-user.c %-user.h @echo -e "\tCC $@" diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 168fe612b029..f125b5f704ba 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1041,9 +1041,11 @@ class RenderInfo: if op_mode == 'notify': op_mode = 'do' for op_dir in ['request', 'reply']: - if op and op_dir in op[op_mode]: - self.struct[op_dir] = Struct(family, self.attr_set, - type_list=op[op_mode][op_dir]['attributes']) + if op: + type_list = [] + if op_dir in op[op_mode]: + type_list = op[op_mode][op_dir]['attributes'] + self.struct[op_dir] = Struct(family, self.attr_set, type_list=type_list) if op_mode == 'event': self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes']) @@ -1752,6 +1754,8 @@ def print_type_helpers(ri, direction, deref=False): def print_req_type_helpers(ri): + if len(ri.struct["request"].attr_list) == 0: + return print_alloc_wrapper(ri, "request") print_type_helpers(ri, "request") @@ -1773,6 +1777,8 @@ def print_parse_prototype(ri, direction, terminate=True): def print_req_type(ri): + if len(ri.struct["request"].attr_list) == 0: + return print_type(ri, "request") @@ -2515,9 +2521,8 @@ def main(): if 'dump' in op: cw.p(f"/* {op.enum_name} - dump */") ri = RenderInfo(cw, parsed, args.mode, op, 'dump') - if 'request' in op['dump']: - print_req_type(ri) - print_req_type_helpers(ri) + print_req_type(ri) + print_req_type_helpers(ri) if not ri.type_consistent: print_rsp_type(ri) print_wrapped_type(ri) diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 1e827c24761c..5c2cc7e8c5d0 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -10,3 +10,4 @@ fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_ma fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index ce6f291665cf..1a63996c0304 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,30 +1,5 @@ # TEMPORARY # Alphabetical order -bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) -bpf_cookie # failed to open_and_load program: -524 (trampoline) -bpf_loop # attaches to __x64_sys_nanosleep -cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) -dynptr/test_dynptr_skb_data -dynptr/test_skb_readonly exceptions # JIT does not support calling kfunc bpf_throw (exceptions) -fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) -iters/testmod_seq* # s390x doesn't support kfuncs in modules yet -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test # relies on fentry -ksyms_btf/weak_ksyms* # test_ksyms_weak__open_and_load unexpected error: -22 (kfunc) -ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) -ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) -ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) -module_attach # skel_attach skeleton attach failed: -524 (trampoline) -ringbuf # skel_load skeleton load failed (?) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -test_lsm # attach unexpected error: -524 (trampoline) -trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) -trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) -unpriv_bpf_disabled # fentry -user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) -verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) -xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) -xdp_metadata # JIT does not support calling kernel function (kfunc) -test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index caede9b574cb..4225f975fce3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -27,7 +27,11 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= SAN_LDFLAGS ?= $(SAN_CFLAGS) -CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ +RELEASE ?= +OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) +CFLAGS += -g $(OPT_FLAGS) -rdynamic \ + -Wall -Werror \ + $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) @@ -104,7 +108,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ xdp_features -TEST_GEN_FILES += liburandom_read.so urandom_read sign-file +TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -188,7 +192,7 @@ $(OUTPUT)/%:%.c $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ # LLVM's ld.lld doesn't support all the architectures, so use it only on x86 -ifeq ($(SRCARCH),x86) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv)) LLD := lld else LLD := ld @@ -196,17 +200,20 @@ endif # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. -$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c +$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map $(call msg,LIB,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ - $^ $(filter-out -static,$(LDLIBS)) \ + $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ + $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ + $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -Wl,--version-script=liburandom_read.map \ -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ + $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ + $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,-rpath=. -o $@ @@ -238,7 +245,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ - EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ @@ -276,7 +283,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -287,7 +294,7 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -310,7 +317,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers @@ -319,7 +326,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ CC="$(HOSTCC)" LD="$(HOSTLD)" \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers @@ -640,7 +647,9 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ -$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) +# Include find_bit.c to compile xskxceiver. +EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c +$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 9aa29564bd74..2c8cb3f61529 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -159,6 +159,14 @@ extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym; */ extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym; +struct bpf_iter_task_vma; + +extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, + unsigned long addr) __ksym; +extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym; +extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym; + /* Convenience macro to wrap over bpf_obj_drop_impl */ #define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 642dda0e758a..5ca68ff0b59f 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -1,6 +1,8 @@ #ifndef __BPF_KFUNCS__ #define __BPF_KFUNCS__ +struct bpf_sock_addr_kern; + /* Description * Initializes an skb-type dynptr * Returns @@ -41,4 +43,16 @@ extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; +/* Description + * Modify the address of a AF_UNIX sockaddr. + * Returns__bpf_kfunc + * -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified. + */ +extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const __u8 *sun_path, __u32 sun_path__sz) __ksym; + +void *bpf_cast_to_kern_ctx(void *) __ksym; + +void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index cefc5dd72573..a5e246f7b202 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -138,6 +138,10 @@ __bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) it->cnt = 0; } +__bpf_kfunc void bpf_kfunc_common_test(void) +{ +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -343,6 +347,7 @@ BTF_SET8_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_kfunc_common_test) BTF_SET8_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index f5c5b1375c24..7c664dd61059 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -104,4 +104,6 @@ void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); + +void bpf_kfunc_common_test(void) __ksym; #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 2caee8423ee0..5b1da2a32ea7 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -49,6 +49,10 @@ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ CGROUP_WORK_DIR) +static __thread bool cgroup_workdir_mounted; + +static void __cleanup_cgroup_environment(void); + static int __enable_controllers(const char *cgroup_path, const char *controllers) { char path[PATH_MAX + 1]; @@ -195,6 +199,11 @@ int setup_cgroup_environment(void) format_cgroup_path(cgroup_workdir, ""); + if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { + log_err("mkdir mount"); + return 1; + } + if (unshare(CLONE_NEWNS)) { log_err("unshare"); return 1; @@ -209,9 +218,10 @@ int setup_cgroup_environment(void) log_err("mount cgroup2"); return 1; } + cgroup_workdir_mounted = true; /* Cleanup existing failed runs, now that the environment is setup */ - cleanup_cgroup_environment(); + __cleanup_cgroup_environment(); if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { log_err("mkdir cgroup work dir"); @@ -306,10 +316,25 @@ int join_parent_cgroup(const char *relative_path) } /** + * __cleanup_cgroup_environment() - Delete temporary cgroups + * + * This is a helper for cleanup_cgroup_environment() that is responsible for + * deletion of all temporary cgroups that have been created during the test. + */ +static void __cleanup_cgroup_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, ""); + join_cgroup_from_top(CGROUP_MOUNT_PATH); + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); +} + +/** * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment * * This is an idempotent function to delete all temporary cgroups that - * have been created during the test, including the cgroup testing work + * have been created during the test and unmount the cgroup testing work * directory. * * At call time, it moves the calling process to the root cgroup, and then @@ -320,11 +345,10 @@ int join_parent_cgroup(const char *relative_path) */ void cleanup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; - - format_cgroup_path(cgroup_workdir, ""); - join_cgroup_from_top(CGROUP_MOUNT_PATH); - nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); + __cleanup_cgroup_environment(); + if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) + log_err("umount cgroup2"); + cgroup_workdir_mounted = false; } /** diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index e41eb33b2704..02dd4409200e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -84,3 +84,4 @@ CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y +CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map new file mode 100644 index 000000000000..38a97a419a04 --- /dev/null +++ b/tools/testing/selftests/bpf/liburandom_read.map @@ -0,0 +1,15 @@ +LIBURANDOM_READ_1.0.0 { + global: + urandlib_api; + urandlib_api_sameoffset; + urandlib_read_without_sema; + urandlib_read_with_sema; + urandlib_read_with_sema_semaphore; + local: + *; +}; + +LIBURANDOM_READ_2.0.0 { + global: + urandlib_api; +} LIBURANDOM_READ_1.0.0; diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index 16f1671e4bde..66191ae9863c 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -33,11 +33,11 @@ static void create_inner_maps(enum bpf_map_type map_type, { int map_fd, map_index, ret; __u32 map_key = 0, map_id; - char map_name[15]; + char map_name[16]; for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) { memset(map_name, 0, sizeof(map_name)); - sprintf(map_name, "inner_map_fd_%d", map_index); + snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index); map_fd = bpf_map_create(map_type, map_name, sizeof(__u32), sizeof(__u32), 1, NULL); CHECK(map_fd < 0, diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index da72a3a66230..6db27a9088e9 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -11,6 +11,7 @@ #include <arpa/inet.h> #include <sys/mount.h> #include <sys/stat.h> +#include <sys/un.h> #include <linux/err.h> #include <linux/in.h> @@ -257,6 +258,26 @@ static int connect_fd_to_addr(int fd, return 0; } +int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type) +{ + int fd; + + fd = socket(addr->ss_family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (connect_fd_to_addr(fd, addr, addrlen, false)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static const struct network_helper_opts default_opts; int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) @@ -380,6 +401,19 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, if (len) *len = sizeof(*sin6); return 0; + } else if (family == AF_UNIX) { + /* Note that we always use abstract unix sockets to avoid having + * to clean up leftover files. + */ + struct sockaddr_un *sun = (void *)addr; + + memset(addr, 0, sizeof(*sun)); + sun->sun_family = family; + sun->sun_path[0] = 0; + strcpy(sun->sun_path + 1, addr_str); + if (len) + *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); + return 0; } return -1; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5eccc67d1a99..34f1200a781b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -51,6 +51,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens); void free_fds(int *fds, unsigned int nr_close_fds); +int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type); int connect_to_fd(int server_fd, int timeout_ms); int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index b92770592563..465c1c3a3d3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -6,6 +6,7 @@ struct bpf_reg_match { unsigned int line; + const char *reg; const char *match; }; @@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=2"}, - {1, "R3_w=4"}, - {2, "R3_w=8"}, - {3, "R3_w=16"}, - {4, "R3_w=32"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "2"}, + {1, "R3_w", "4"}, + {2, "R3_w", "8"}, + {3, "R3_w", "16"}, + {4, "R3_w", "32"}, }, }, { @@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=1"}, - {1, "R3_w=2"}, - {2, "R3_w=4"}, - {3, "R3_w=8"}, - {4, "R3_w=16"}, - {5, "R3_w=1"}, - {6, "R4_w=32"}, - {7, "R4_w=16"}, - {8, "R4_w=8"}, - {9, "R4_w=4"}, - {10, "R4_w=2"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "1"}, + {1, "R3_w", "2"}, + {2, "R3_w", "4"}, + {3, "R3_w", "8"}, + {4, "R3_w", "16"}, + {5, "R3_w", "1"}, + {6, "R4_w", "32"}, + {7, "R4_w", "16"}, + {8, "R4_w", "8"}, + {9, "R4_w", "4"}, + {10, "R4_w", "2"}, }, }, { @@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=4"}, - {1, "R3_w=8"}, - {2, "R3_w=10"}, - {3, "R4_w=8"}, - {4, "R4_w=12"}, - {5, "R4_w=14"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "4"}, + {1, "R3_w", "8"}, + {2, "R3_w", "10"}, + {3, "R4_w", "8"}, + {4, "R4_w", "12"}, + {5, "R4_w", "14"}, }, }, { @@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=7"}, - {1, "R3_w=7"}, - {2, "R3_w=14"}, - {3, "R3_w=56"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "7"}, + {1, "R3_w", "7"}, + {2, "R3_w", "14"}, + {3, "R3_w", "56"}, }, }, @@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w=pkt(off=8,r=8,imm=0)"}, - {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {7, "R3_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, - {8, "R3_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {9, "R3_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {10, "R3_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, - {12, "R3_w=pkt_end(off=0,imm=0)"}, - {17, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {18, "R4_w=scalar(umax=8160,var_off=(0x0; 0x1fe0))"}, - {19, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, - {20, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {21, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {22, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, + {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R3_w", "var_off=(0x0; 0xff)"}, + {7, "R3_w", "var_off=(0x0; 0x1fe)"}, + {8, "R3_w", "var_off=(0x0; 0x3fc)"}, + {9, "R3_w", "var_off=(0x0; 0x7f8)"}, + {10, "R3_w", "var_off=(0x0; 0xff0)"}, + {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {17, "R4_w", "var_off=(0x0; 0xff)"}, + {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, + {19, "R4_w", "var_off=(0x0; 0xff0)"}, + {20, "R4_w", "var_off=(0x0; 0x7f8)"}, + {21, "R4_w", "var_off=(0x0; 0x3fc)"}, + {22, "R4_w", "var_off=(0x0; 0x1fe)"}, }, }, { @@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {7, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {9, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, - {11, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {12, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {13, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {14, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {15, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, + {6, "R3_w", "var_off=(0x0; 0xff)"}, + {7, "R4_w", "var_off=(0x0; 0xff)"}, + {8, "R4_w", "var_off=(0x0; 0xff)"}, + {9, "R4_w", "var_off=(0x0; 0xff)"}, + {10, "R4_w", "var_off=(0x0; 0x1fe)"}, + {11, "R4_w", "var_off=(0x0; 0xff)"}, + {12, "R4_w", "var_off=(0x0; 0x3fc)"}, + {13, "R4_w", "var_off=(0x0; 0xff)"}, + {14, "R4_w", "var_off=(0x0; 0x7f8)"}, + {15, "R4_w", "var_off=(0x0; 0xff0)"}, }, }, { @@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w=pkt(off=0,r=0,imm=0)"}, - {4, "R5_w=pkt(off=14,r=0,imm=0)"}, - {5, "R4_w=pkt(off=14,r=0,imm=0)"}, - {9, "R2=pkt(off=0,r=18,imm=0)"}, - {10, "R5=pkt(off=14,r=18,imm=0)"}, - {10, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {13, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"}, - {14, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"}, + {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, + {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, + {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, + {9, "R2", "pkt(off=0,r=18,imm=0)"}, + {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {10, "R4_w", "var_off=(0x0; 0xff)"}, + {13, "R4_w", "var_off=(0x0; 0xffff)"}, + {14, "R4_w", "var_off=(0x0; 0xffff)"}, }, }, { @@ -298,20 +299,20 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5_w=pkt(id=1,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w", "pkt(id=1,off=14,"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable * offset is considered using reg->aux_off_align which * is 4 and meets the load's requirements. */ - {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {15, "R4", "var_off=(0x0; 0x3fc)"}, + {15, "R5", "var_off=(0x0; 0x3fc)"}, /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. To avoid BPF * verifier's precision backtracking logging @@ -319,46 +320,46 @@ static struct bpf_align_test tests[] = { * instruction to validate R5 state. We also check * that R4 is what it should be in such case. */ - {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, - {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R4_w", "var_off=(0x0; 0x3fc)"}, + {18, "R5_w", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w", "pkt(id=2,off=14,"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte * aligned, so the total offset is 4-byte aligned and * meets the load's requirements. */ - {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R4", "var_off=(0x0; 0x3fc)"}, + {24, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w=pkt(off=14,r=8"}, + {26, "R5_w", "pkt(off=14,r=8,"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. */ - {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, - {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R4_w", "var_off=(0x0; 0x3fc)"}, + {28, "R5_w", "var_off=(0x0; 0x3fc)"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {29, "R5_w", "pkt(id=3,off=18,"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, - {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R4_w", "var_off=(0x0; 0x7fc)"}, + {31, "R5_w", "var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, - {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R4", "var_off=(0x0; 0x7fc)"}, + {35, "R5", "var_off=(0x0; 0x7fc)"}, }, }, { @@ -396,36 +397,36 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"}, + {8, "R6_w", "var_off=(0x2; 0x7fc)"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w=pkt(id=1,off=0,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, - {12, "R4=pkt(id=1,off=4,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, + {11, "R5_w", "var_off=(0x2; 0x7fc)"}, + {12, "R4", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {15, "R5=pkt(id=1,off=0,r=4,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, + {15, "R5", "var_off=(0x2; 0x7fc)"}, /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {17, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {17, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w=pkt(id=2,off=0,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, - {20, "R4=pkt(id=2,off=4,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, + {19, "R5_w", "var_off=(0x2; 0xffc)"}, + {20, "R4", "var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {23, "R5=pkt(id=2,off=0,r=4,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, + {23, "R5", "var_off=(0x2; 0xffc)"}, }, }, { @@ -458,18 +459,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w=pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end(off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w=scalar(smax=9223372036854775804,umax=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, + {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {6, "R5_w=scalar(smin=-9223372036854775806,smax=9223372036854775806,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=scalar(umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w=pkt(id=1,off=4,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -477,7 +478,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { @@ -512,24 +513,23 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {8, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w", "var_off=(0x2; 0x7fc)"}, /* New unknown value in R7 is (4n) */ - {10, "R7_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {10, "R7_w", "var_off=(0x0; 0x3fc)"}, /* Subtracting it from R6 blows our unsigned bounds */ - {11, "R6=scalar(smin=-1006,smax=1034,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ - {14, "R6=scalar(umin=2,umax=1034,var_off=(0x2; 0x7fc))"}, + {14, "R6", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin=2,umax=1034,var_off=(0x2; 0x7fc)"}, - + {20, "R5", "var_off=(0x2; 0x7fc)"}, }, }, { @@ -566,23 +566,23 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {9, "R6_w=scalar(umax=60,var_off=(0x0; 0x3c))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w=scalar(umin=14,umax=74,var_off=(0x2; 0x7c))"}, + {10, "R6_w", "var_off=(0x2; 0x7c)"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=2,off=0,r=8,umin=18446744073709551542,umax=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"}, + {14, "R7_w", "var_off=(0x0; 0x7fc)"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, + {16, "R5_w", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, + {20, "R5", "var_off=(0x2; 0x7fc)"}, }, }, }; @@ -635,6 +635,7 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(bpf_vlog_copy, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; + const char *p; int tmp; if (!m.match) @@ -649,8 +650,8 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(NULL, "\n"); } if (!line_ptr) { - printf("Failed to find line %u for match: %s\n", - m.line, m.match); + printf("Failed to find line %u for match: %s=%s\n", + m.line, m.reg, m.match); ret = 1; printf("%s", bpf_vlog); break; @@ -667,15 +668,15 @@ static int do_test_single(struct bpf_align_test *test) * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ - while (!strstr(line_ptr, m.match)) { + while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; line_ptr = strtok(NULL, "\n"); sscanf(line_ptr ?: "", "%u: ", &cur_line); if (!line_ptr || cur_line != m.line) break; } - if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) { - printf("Failed to find match %u: %s\n", m.line, m.match); + if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { + printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match); ret = 1; printf("%s", bpf_vlog); break; diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index d2d9e965eba5..053f4d6da77a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -193,8 +193,8 @@ error: void test_bloom_filter_map(void) { - __u32 *rand_vals, nr_rand_vals; - struct bloom_filter_map *skel; + __u32 *rand_vals = NULL, nr_rand_vals = 0; + struct bloom_filter_map *skel = NULL; int err; test_fail_cases(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 1f02168103dd..41aba139b20b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -10,7 +10,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" -#include "bpf_iter_task_vma.skel.h" +#include "bpf_iter_task_vmas.skel.h" #include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" @@ -1399,19 +1399,19 @@ static void str_strip_first_line(char *str) static void test_task_vma_common(struct bpf_iter_attach_opts *opts) { int err, iter_fd = -1, proc_maps_fd = -1; - struct bpf_iter_task_vma *skel; + struct bpf_iter_task_vmas *skel; int len, read_size = 4; char maps_path[64]; - skel = bpf_iter_task_vma__open(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + skel = bpf_iter_task_vmas__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open")) return; skel->bss->pid = getpid(); skel->bss->one_task = opts ? 1 : 0; - err = bpf_iter_task_vma__load(skel); - if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + err = bpf_iter_task_vmas__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vmas__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1462,25 +1462,25 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts) out: close(proc_maps_fd); close(iter_fd); - bpf_iter_task_vma__destroy(skel); + bpf_iter_task_vmas__destroy(skel); } static void test_task_vma_dead_task(void) { - struct bpf_iter_task_vma *skel; + struct bpf_iter_task_vmas *skel; int wstatus, child_pid = -1; time_t start_tm, cur_tm; int err, iter_fd = -1; int wait_sec = 3; - skel = bpf_iter_task_vma__open(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + skel = bpf_iter_task_vmas__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open")) return; skel->bss->pid = getpid(); - err = bpf_iter_task_vma__load(skel); - if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + err = bpf_iter_task_vmas__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vmas__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1533,7 +1533,7 @@ static void test_task_vma_dead_task(void) out: waitpid(child_pid, &wstatus, 0); close(iter_fd); - bpf_iter_task_vma__destroy(skel); + bpf_iter_task_vmas__destroy(skel); } void test_bpf_sockmap_map_iter_fd(void) diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c index 289218c2216c..40fe571f2fe7 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_ping.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c @@ -28,9 +28,9 @@ static void subtest(int cgroup_fd, struct connect_ping *skel, .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; - struct sockaddr *sa; + struct sockaddr *sa = NULL; socklen_t sa_len; - int protocol; + int protocol = -1; int sock_fd; switch (family) { diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 2fd05649bad1..4ad4cd69152e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -11,9 +11,13 @@ #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" +#define IPV6_IFACE_ADDR_SEC "cafe::cafe" +#define IPV6_ADDR_DST "face::3" #define IPV6_NUD_FAILED_ADDR "face::1" #define IPV6_NUD_STALE_ADDR "face::2" #define IPV4_IFACE_ADDR "10.0.0.254" +#define IPV4_IFACE_ADDR_SEC "10.1.0.254" +#define IPV4_ADDR_DST "10.2.0.254" #define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_STALE_ADDR "10.0.0.2" #define IPV4_TBID_ADDR "172.0.0.254" @@ -31,6 +35,7 @@ struct fib_lookup_test { const char *desc; const char *daddr; int expected_ret; + const char *expected_src; int lookup_flags; __u32 tbid; __u8 dmac[6]; @@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = { .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, .dmac = DMAC_INIT2, }, + { .desc = "IPv4 set src addr from netdev", + .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set src addr from netdev", + .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 set prefsrc addr from route", + .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set prefsrc addr route", + .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, }; static int ifindex; @@ -97,6 +118,13 @@ static int setup_netns(void) SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + /* Setup for prefsrc IP addr selection */ + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC); + + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC); + /* Setup for tbid lookup tests */ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); @@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; - ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) - return -1; + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) + return -1; + } + return 0; } @@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) return -1; params->family = AF_INET; - ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) - return -1; + + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) + return -1; + } return 0; } @@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } +static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src) +{ + int ret; + __u32 src6[4]; + __be32 src4; + + switch (fib_params->family) { + case AF_INET6: + ret = inet_pton(AF_INET6, expected_src, src6); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src)); + if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) { + char str_src6[64]; + + inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6, + sizeof(str_src6)); + printf("ipv6 expected %s actual %s ", expected_src, + str_src6); + } + + break; + case AF_INET: + ret = inet_pton(AF_INET, expected_src, &src4); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src"); + + break; + default: + PRINT_FAIL("invalid addr family: %d", fib_params->family); + } +} + void test_fib_lookup(void) { struct bpf_fib_lookup *fib_params; @@ -207,6 +275,9 @@ void test_fib_lookup(void) ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, "fib_lookup_ret"); + if (tests[i].expected_src) + assert_src_ip(fib_params, tests[i].expected_src); + ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); if (!ASSERT_EQ(ret, 0, "dmac not match")) { char expected[18], actual[18]; diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 10804ae5ae97..b696873c5455 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -8,6 +8,7 @@ #include "iters_looping.skel.h" #include "iters_num.skel.h" #include "iters_testmod_seq.skel.h" +#include "iters_task_vma.skel.h" static void subtest_num_iters(void) { @@ -90,6 +91,61 @@ cleanup: iters_testmod_seq__destroy(skel); } +static void subtest_task_vma_iters(void) +{ + unsigned long start, end, bpf_iter_start, bpf_iter_end; + struct iters_task_vma *skel; + char rest_of_line[1000]; + unsigned int seen; + FILE *f = NULL; + int err; + + skel = iters_task_vma__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + skel->bss->target_pid = getpid(); + + err = iters_task_vma__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + getpgid(skel->bss->target_pid); + iters_task_vma__detach(skel); + + if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero")) + goto cleanup; + + f = fopen("/proc/self/maps", "r"); + if (!ASSERT_OK_PTR(f, "proc_maps_fopen")) + goto cleanup; + + seen = 0; + while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) { + /* [vsyscall] vma isn't _really_ part of task->mm vmas. + * /proc/PID/maps returns it when out of vmas - see get_gate_vma + * calls in fs/proc/task_mmu.c + */ + if (strstr(rest_of_line, "[vsyscall]")) + continue; + + bpf_iter_start = skel->bss->vm_ranges[seen].vm_start; + bpf_iter_end = skel->bss->vm_ranges[seen].vm_end; + + ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match"); + ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match"); + seen++; + } + + if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + iters_task_vma__destroy(skel); +} + void test_iters(void) { RUN_TESTS(iters_state_safety); @@ -103,4 +159,6 @@ void test_iters(void) subtest_num_iters(); if (test__start_subtest("testmod_seq")) subtest_testmod_seq_iters(); + if (test__start_subtest("task_vma")) + subtest_task_vma_iters(); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index db3bf6bbe01a..69dc31383b78 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -268,7 +268,7 @@ end: static void list_and_rb_node_same_struct(bool refcount_field) { - int bpf_rb_node_btf_id, bpf_refcount_btf_id, foo_btf_id; + int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id; struct btf *btf; int id, err; diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index 61333f2a03f9..e9190574e79f 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -49,7 +49,8 @@ static int open_tuntap(const char *dev_name, bool need_mac) return -1; ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); + strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c new file mode 100644 index 000000000000..70d90c43537c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/missed.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "missed_kprobe.skel.h" +#include "missed_kprobe_recursion.skel.h" +#include "missed_tp_recursion.skel.h" + +/* + * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test + * kfunc, which has also kprobe on. The latter won't get triggered due + * to kprobe recursion check and kprobe missed counter is incremented. + */ +static void test_missed_perf_kprobe(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_link_info info = {}; + struct missed_kprobe *skel; + __u32 len = sizeof(info); + int err, prog_fd; + + skel = missed_kprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load")) + goto cleanup; + + err = missed_kprobe__attach(skel); + if (!ASSERT_OK(err, "missed_kprobe__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + goto cleanup; + + ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type"); + ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type"); + ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed"); + +cleanup: + missed_kprobe__destroy(skel); +} + +static __u64 get_missed_count(int fd) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + int err; + + err = bpf_prog_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + return (__u64) -1; + return info.recursion_misses; +} + +/* + * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test + * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached. + * + * Because fprobe (kprobe.multi attach layear) does not have strict recursion + * check the kprobe's bpf_prog_active check is hit for test2-5. + */ +static void test_missed_kprobe_recursion(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct missed_kprobe_recursion *skel; + int err, prog_fd; + + skel = missed_kprobe_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load")) + goto cleanup; + + err = missed_kprobe_recursion__attach(skel); + if (!ASSERT_OK(err, "missed_kprobe_recursion__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses"); + +cleanup: + missed_kprobe_recursion__destroy(skel); +} + +/* + * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes + * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234] + * programs attached to it. + * + * Because kprobe execution goes through bpf_prog_active check, programs + * attached to the tracepoint will fail the recursion check and increment + * the recursion_misses stats. + */ +static void test_missed_tp_recursion(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct missed_tp_recursion *skel; + int err, prog_fd; + + skel = missed_tp_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load")) + goto cleanup; + + err = missed_tp_recursion__attach(skel); + if (!ASSERT_OK(err, "missed_tp_recursion__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); + +cleanup: + missed_tp_recursion__destroy(skel); +} + +void test_missed(void) +{ + if (test__start_subtest("perf_kprobe")) + test_missed_perf_kprobe(); + if (test__start_subtest("kprobe_recursion")) + test_missed_kprobe_recursion(); + if (test__start_subtest("tp_recursion")) + test_missed_tp_recursion(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c index 9541e9b3a034..343da65864d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c +++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c @@ -19,6 +19,7 @@ static void test_array(void) bpf_program__set_autoload(skel->progs.test_array_map_3, true); bpf_program__set_autoload(skel->progs.test_array_map_4, true); + skel->bss->my_pid = getpid(); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); err = percpu_alloc_array__load(skel); @@ -51,6 +52,7 @@ static void test_array_sleepable(void) bpf_program__set_autoload(skel->progs.test_array_map_10, true); + skel->bss->my_pid = getpid(); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); err = percpu_alloc_array__load(skel); @@ -85,6 +87,7 @@ static void test_cgrp_local_storage(void) if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open")) goto close_fd; + skel->bss->my_pid = getpid(); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); err = percpu_alloc_cgrp_local_storage__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index 722c5f2a7776..a043af9cd6d9 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type) int i, err, prog_fd, map_in_fd, map_out_fd; char file[32], buf[128]; struct bpf_object *obj; - struct iphdr iph; + struct iphdr iph = {}; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index ac104dc652e3..48c5695b7abf 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -91,6 +91,9 @@ static void ringbuf_subtest(void) int err, cnt, rb_fd; int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; + struct ring *ring; + int map_fd; + unsigned long avail_data, ring_size, cons_pos, prod_pos; skel = test_ringbuf_lskel__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) @@ -162,6 +165,13 @@ static void ringbuf_subtest(void) trigger_samples(); + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0")) + goto cleanup; + + map_fd = ring__map_fd(ring); + ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd"); + /* 2 submitted + 1 discarded records */ CHECK(skel->bss->avail_data != 3 * rec_sz, "err_avail_size", "exp %ld, got %ld\n", @@ -176,6 +186,18 @@ static void ringbuf_subtest(void) "err_prod_pos", "exp %ld, got %ld\n", 3L * rec_sz, skel->bss->prod_pos); + /* verify getting this data directly via the ring object yields the same + * results + */ + avail_data = ring__avail_data_size(ring); + ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size"); + ring_size = ring__size(ring); + ASSERT_EQ(ring_size, page_size, "ring_ring_size"); + cons_pos = ring__consumer_pos(ring); + ASSERT_EQ(cons_pos, 0, "ring_cons_pos"); + prod_pos = ring__producer_pos(ring); + ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos"); + /* poll for samples */ err = ring_buffer__poll(ringbuf, -1); @@ -282,6 +304,10 @@ static void ringbuf_subtest(void) err = ring_buffer__consume(ringbuf); CHECK(err < 0, "rb_consume", "failed: %d\b", err); + /* also consume using ring__consume to make sure it works the same */ + err = ring__consume(ring); + ASSERT_GE(err, 0, "ring_consume"); + /* 3 rounds, 2 samples each */ cnt = atomic_xchg(&sample_cnt, 0); CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 1455911d9fcb..58522195081b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -42,6 +42,8 @@ void test_ringbuf_multi(void) { struct test_ringbuf_multi *skel; struct ring_buffer *ringbuf = NULL; + struct ring *ring_old; + struct ring *ring; int err; int page_size = getpagesize(); int proto_fd = -1; @@ -84,11 +86,24 @@ void test_ringbuf_multi(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; + /* verify ring_buffer__ring returns expected results */ + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0")) + goto cleanup; + ring_old = ring; + ring = ring_buffer__ring(ringbuf, 1); + ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1"); + err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2), process_sample, (void *)(long)2); if (CHECK(err, "ringbuf_add", "failed to add another ring\n")) goto cleanup; + /* verify adding a new ring didn't invalidate our older pointer */ + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again")) + goto cleanup; + err = test_ringbuf_multi__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 8b571890c57e..c3d78846f31a 100644 --- a/tools/testing/selftests/bpf/prog_tests/section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -124,6 +124,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_INET6_CONNECT}, }, { + "cgroup/connect_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT}, + {0, BPF_CGROUP_UNIX_CONNECT}, + }, + { "cgroup/sendmsg4", {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG}, {0, BPF_CGROUP_UDP4_SENDMSG}, @@ -134,6 +139,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_UDP6_SENDMSG}, }, { + "cgroup/sendmsg_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG}, + {0, BPF_CGROUP_UNIX_SENDMSG}, + }, + { "cgroup/recvmsg4", {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG}, {0, BPF_CGROUP_UDP4_RECVMSG}, @@ -144,6 +154,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_UDP6_RECVMSG}, }, { + "cgroup/recvmsg_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG}, + {0, BPF_CGROUP_UNIX_RECVMSG}, + }, + { "cgroup/sysctl", {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL}, {0, BPF_CGROUP_SYSCTL}, @@ -158,6 +173,36 @@ static struct sec_name_test tests[] = { {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT}, {0, BPF_CGROUP_SETSOCKOPT}, }, + { + "cgroup/getpeername4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME}, + {0, BPF_CGROUP_INET4_GETPEERNAME}, + }, + { + "cgroup/getpeername6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME}, + {0, BPF_CGROUP_INET6_GETPEERNAME}, + }, + { + "cgroup/getpeername_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME}, + {0, BPF_CGROUP_UNIX_GETPEERNAME}, + }, + { + "cgroup/getsockname4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME}, + {0, BPF_CGROUP_INET4_GETSOCKNAME}, + }, + { + "cgroup/getsockname6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME}, + {0, BPF_CGROUP_INET6_GETSOCKNAME}, + }, + { + "cgroup/getsockname_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME}, + {0, BPF_CGROUP_UNIX_GETSOCKNAME}, + }, }; static void test_prog_type_by_name(const struct sec_name_test *test) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c new file mode 100644 index 000000000000..5fd617718991 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/un.h> + +#include "test_progs.h" + +#include "connect_unix_prog.skel.h" +#include "sendmsg_unix_prog.skel.h" +#include "recvmsg_unix_prog.skel.h" +#include "getsockname_unix_prog.skel.h" +#include "getpeername_unix_prog.skel.h" +#include "network_helpers.h" + +#define SERVUN_ADDRESS "bpf_cgroup_unix_test" +#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite" +#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src" + +enum sock_addr_test_type { + SOCK_ADDR_TEST_BIND, + SOCK_ADDR_TEST_CONNECT, + SOCK_ADDR_TEST_SENDMSG, + SOCK_ADDR_TEST_RECVMSG, + SOCK_ADDR_TEST_GETSOCKNAME, + SOCK_ADDR_TEST_GETPEERNAME, +}; + +typedef void *(*load_fn)(int cgroup_fd); +typedef void (*destroy_fn)(void *skel); + +struct sock_addr_test { + enum sock_addr_test_type type; + const char *name; + /* BPF prog properties */ + load_fn loadfn; + destroy_fn destroyfn; + /* Socket properties */ + int socket_family; + int socket_type; + /* IP:port pairs for BPF prog to override */ + const char *requested_addr; + unsigned short requested_port; + const char *expected_addr; + unsigned short expected_port; + const char *expected_src_addr; +}; + +static void *connect_unix_prog_load(int cgroup_fd) +{ + struct connect_unix_prog *skel; + + skel = connect_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.connect_unix_prog = bpf_program__attach_cgroup( + skel->progs.connect_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + connect_unix_prog__destroy(skel); + return NULL; +} + +static void connect_unix_prog_destroy(void *skel) +{ + connect_unix_prog__destroy(skel); +} + +static void *sendmsg_unix_prog_load(int cgroup_fd) +{ + struct sendmsg_unix_prog *skel; + + skel = sendmsg_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup( + skel->progs.sendmsg_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + sendmsg_unix_prog__destroy(skel); + return NULL; +} + +static void sendmsg_unix_prog_destroy(void *skel) +{ + sendmsg_unix_prog__destroy(skel); +} + +static void *recvmsg_unix_prog_load(int cgroup_fd) +{ + struct recvmsg_unix_prog *skel; + + skel = recvmsg_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup( + skel->progs.recvmsg_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + recvmsg_unix_prog__destroy(skel); + return NULL; +} + +static void recvmsg_unix_prog_destroy(void *skel) +{ + recvmsg_unix_prog__destroy(skel); +} + +static void *getsockname_unix_prog_load(int cgroup_fd) +{ + struct getsockname_unix_prog *skel; + + skel = getsockname_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.getsockname_unix_prog = bpf_program__attach_cgroup( + skel->progs.getsockname_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + getsockname_unix_prog__destroy(skel); + return NULL; +} + +static void getsockname_unix_prog_destroy(void *skel) +{ + getsockname_unix_prog__destroy(skel); +} + +static void *getpeername_unix_prog_load(int cgroup_fd) +{ + struct getpeername_unix_prog *skel; + + skel = getpeername_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.getpeername_unix_prog = bpf_program__attach_cgroup( + skel->progs.getpeername_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + getpeername_unix_prog__destroy(skel); + return NULL; +} + +static void getpeername_unix_prog_destroy(void *skel) +{ + getpeername_unix_prog__destroy(skel); +} + +static struct sock_addr_test tests[] = { + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix", + connect_unix_prog_load, + connect_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix", + sendmsg_unix_prog_load, + sendmsg_unix_prog_destroy, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix-dgram", + recvmsg_unix_prog_load, + recvmsg_unix_prog_destroy, + AF_UNIX, + SOCK_DGRAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix-stream", + recvmsg_unix_prog_load, + recvmsg_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname_unix", + getsockname_unix_prog_load, + getsockname_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername_unix", + getpeername_unix_prog_load, + getpeername_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, +}; + +typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); + +static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len, + const struct sockaddr_storage *addr2, socklen_t addr2_len, + bool cmp_port) +{ + const struct sockaddr_in *four1, *four2; + const struct sockaddr_in6 *six1, *six2; + const struct sockaddr_un *un1, *un2; + + if (addr1->ss_family != addr2->ss_family) + return -1; + + if (addr1_len != addr2_len) + return -1; + + if (addr1->ss_family == AF_INET) { + four1 = (const struct sockaddr_in *)addr1; + four2 = (const struct sockaddr_in *)addr2; + return !((four1->sin_port == four2->sin_port || !cmp_port) && + four1->sin_addr.s_addr == four2->sin_addr.s_addr); + } else if (addr1->ss_family == AF_INET6) { + six1 = (const struct sockaddr_in6 *)addr1; + six2 = (const struct sockaddr_in6 *)addr2; + return !((six1->sin6_port == six2->sin6_port || !cmp_port) && + !memcmp(&six1->sin6_addr, &six2->sin6_addr, + sizeof(struct in6_addr))); + } else if (addr1->ss_family == AF_UNIX) { + un1 = (const struct sockaddr_un *)addr1; + un2 = (const struct sockaddr_un *)addr2; + return memcmp(un1, un2, addr1_len); + } + + return -1; +} + +static int cmp_sock_addr(info_fn fn, int sock1, + const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + struct sockaddr_storage addr1; + socklen_t len1 = sizeof(addr1); + + memset(&addr1, 0, len1); + if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0) + return -1; + + return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port); +} + +static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port); +} + +static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port); +} + +static void test_bind(struct sock_addr_test *test) +{ + struct sockaddr_storage expected_addr; + socklen_t expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, + test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + + /* Try to connect to server just in case */ + client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_connect(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, expected_addr, expected_src_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + expected_addr_len = sizeof(struct sockaddr_storage), + expected_src_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->expected_addr, test->expected_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + client = connect_to_addr(&addr, addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + if (test->expected_src_addr) { + err = make_sockaddr(test->socket_family, test->expected_src_addr, 0, + &expected_src_addr, &expected_src_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + } + + err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) + goto cleanup; + + if (test->expected_src_addr) { + err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + } +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_xmsg(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, src_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + src_addr_len = sizeof(struct sockaddr_storage); + struct msghdr hdr; + struct iovec iov; + char data = 'a'; + int serv = -1, client = -1, err; + + /* Unlike the other tests, here we test that we can rewrite the src addr + * with a recvmsg() hook. + */ + + serv = start_server(test->socket_family, test->socket_type, + test->expected_addr, test->expected_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + client = socket(test->socket_family, test->socket_type, 0); + if (!ASSERT_GE(client, 0, "socket")) + goto cleanup; + + /* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */ + if (test->socket_family == AF_UNIX) { + err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len); + if (!ASSERT_OK(err, "bind")) + goto cleanup; + } + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + if (test->socket_type == SOCK_DGRAM) { + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_name = (void *)&addr; + hdr.msg_namelen = addr_len; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + + err = sendmsg(client, &hdr, 0); + if (!ASSERT_EQ(err, sizeof(data), "sendmsg")) + goto cleanup; + } else { + /* Testing with connection-oriented sockets is only valid for + * recvmsg() tests. + */ + if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg")) + goto cleanup; + + err = connect(client, (const struct sockaddr *)&addr, addr_len); + if (!ASSERT_OK(err, "connect")) + goto cleanup; + + err = send(client, &data, sizeof(data), 0); + if (!ASSERT_EQ(err, sizeof(data), "send")) + goto cleanup; + + err = listen(serv, 0); + if (!ASSERT_OK(err, "listen")) + goto cleanup; + + err = accept(serv, NULL, NULL); + if (!ASSERT_GE(err, 0, "accept")) + goto cleanup; + + close(serv); + serv = err; + } + + addr_len = src_addr_len = sizeof(struct sockaddr_storage); + + err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len); + if (!ASSERT_EQ(err, sizeof(data), "recvfrom")) + goto cleanup; + + ASSERT_EQ(data, 'a', "data mismatch"); + + if (test->expected_src_addr) { + err = make_sockaddr(test->socket_family, test->expected_src_addr, 0, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false); + if (!ASSERT_EQ(err, 0, "cmp_addr")) + goto cleanup; + } + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_getsockname(struct sock_addr_test *test) +{ + struct sockaddr_storage expected_addr; + socklen_t expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, + test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + +cleanup: + if (serv != -1) + close(serv); +} + +static void test_getpeername(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, expected_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + client = connect_to_addr(&addr, addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) + goto cleanup; + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +void test_sock_addr(void) +{ + int cgroup_fd = -1; + void *skel; + + cgroup_fd = test__join_cgroup("/sock_addr"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) { + struct sock_addr_test *test = &tests[i]; + + if (!test__start_subtest(test->name)) + continue; + + skel = test->loadfn(cgroup_fd); + if (!skel) + continue; + + switch (test->type) { + /* Not exercised yet but we leave this code here for when the + * INET and INET6 sockaddr tests are migrated to this file in + * the future. + */ + case SOCK_ADDR_TEST_BIND: + test_bind(test); + break; + case SOCK_ADDR_TEST_CONNECT: + test_connect(test); + break; + case SOCK_ADDR_TEST_SENDMSG: + case SOCK_ADDR_TEST_RECVMSG: + test_xmsg(test); + break; + case SOCK_ADDR_TEST_GETSOCKNAME: + test_getsockname(test); + break; + case SOCK_ADDR_TEST_GETPEERNAME: + test_getpeername(test); + break; + default: + ASSERT_TRUE(false, "Unknown sock addr test type"); + break; + } + + test->destroyfn(skel); + } + +cleanup: + if (cgroup_fd >= 0) + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index dda7060e86a0..f75f84d0b3d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -359,7 +359,7 @@ out: static void test_sockmap_skb_verdict_shutdown(void) { struct epoll_event ev, events[MAX_EVENTS]; - int n, err, map, verdict, s, c1, p1; + int n, err, map, verdict, s, c1 = -1, p1 = -1; struct test_sockmap_pass_prog *skel; int epollfd; int zero = 0; @@ -414,9 +414,9 @@ out: static void test_sockmap_skb_verdict_fionread(bool pass_prog) { int expected, zero = 0, sent, recvd, avail; - int err, map, verdict, s, c0, c1, p0, p1; - struct test_sockmap_pass_prog *pass; - struct test_sockmap_drop_prog *drop; + int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; + struct test_sockmap_pass_prog *pass = NULL; + struct test_sockmap_drop_prog *drop = NULL; char buf[256] = "0123456789"; if (pass_prog) { diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 36d829a65aa4..e880f97bc44d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -378,7 +378,7 @@ static inline int enable_reuseport(int s, int progfd) static inline int socket_loopback_reuseport(int family, int sotype, int progfd) { struct sockaddr_storage addr; - socklen_t len; + socklen_t len = 0; int err, s; init_addr_loopback(family, &addr, &len); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 8df8cbb447f1..a934d430c20c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -73,7 +73,7 @@ static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, int family, int sotype, int mapfd) { struct sockaddr_storage addr; - socklen_t len; + socklen_t len = 0; u32 key = 0; u64 value; int err, s; @@ -871,7 +871,7 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, static void redir_partial(int family, int sotype, int sock_map, int parser_map) { - int s, c0, c1, p0, p1; + int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; int err, n, key, value; char buf[] = "abc"; @@ -1336,53 +1336,59 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } -static void unix_redir_to_connected(int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, + int sock_mapfd, int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); - int c0, c1, p0, p1; unsigned int pass; int err, n; - int sfd[2]; u32 key; char b; zero_verdict_count(verd_mapfd); - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - goto close0; - c1 = sfd[0], p1 = sfd[1]; - - err = add_to_sockmap(sock_mapfd, p0, p1); + err = add_to_sockmap(sock_mapfd, peer0, peer1); if (err) - goto close; + return; - n = write(c1, "a", 1); + n = write(cli1, "a", 1); if (n < 0) FAIL_ERRNO("%s: write", log_prefix); if (n == 0) FAIL("%s: incomplete write", log_prefix); if (n < 1) - goto close; + return; key = SK_PASS; err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); if (err) - goto close; + return; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); if (n < 0) FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) FAIL("%s: incomplete recv", log_prefix); +} + +static void unix_redir_to_connected(int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + int c0, c1, p0, p1; + int sfd[2]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + goto close0; + c1 = sfd[0], p1 = sfd[1]; + + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close: xclose(c1); xclose(p1); close0: @@ -1661,14 +1667,8 @@ close_peer0: static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); if (err) @@ -1677,32 +1677,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close_cli1; + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close_cli1; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close_cli1; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - -close_cli1: xclose(c1); xclose(p1); close_cli0: @@ -1747,15 +1723,9 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; int sfd[2]; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) return; @@ -1765,32 +1735,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close_cli1; - - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close_cli1; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close_cli1; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close_cli1: xclose(c1); xclose(p1); close: @@ -1827,15 +1773,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; int sfd[2]; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); if (err) @@ -1845,32 +1785,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close; - - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close: xclose(c1); xclose(p1); close_cli0: diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 6c93215be8a3..67f985f7d215 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -45,7 +45,7 @@ static inline __u32 ifindex_from_link_fd(int fd) return link_info.tcx.ifindex; } -static inline void __assert_mprog_count(int target, int expected, bool miniq, int ifindex) +static inline void __assert_mprog_count(int target, int expected, int ifindex) { __u32 count = 0, attach_flags = 0; int err; @@ -53,20 +53,22 @@ static inline void __assert_mprog_count(int target, int expected, bool miniq, in err = bpf_prog_query(ifindex, target, 0, &attach_flags, NULL, &count); ASSERT_EQ(count, expected, "count"); - if (!expected && !miniq) - ASSERT_EQ(err, -ENOENT, "prog_query"); - else - ASSERT_EQ(err, 0, "prog_query"); + ASSERT_EQ(err, 0, "prog_query"); } static inline void assert_mprog_count(int target, int expected) { - __assert_mprog_count(target, expected, false, loopback); + __assert_mprog_count(target, expected, loopback); } static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected) { - __assert_mprog_count(target, expected, false, ifindex); + __assert_mprog_count(target, expected, ifindex); +} + +static inline void tc_skel_reset_all_seen(struct test_tc_link *skel) +{ + memset(skel->bss, 0, sizeof(*skel->bss)); } #endif /* TC_HELPERS */ diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index 74fc1fe9ee26..bc9841144685 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -65,6 +65,7 @@ void serial_test_tc_links_basic(void) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -97,6 +98,7 @@ void serial_test_tc_links_basic(void) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -187,6 +189,7 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -194,9 +197,6 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, .relative_fd = bpf_program__fd(skel->progs.tc2), @@ -246,6 +246,7 @@ static void test_tc_links_before_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -342,6 +343,7 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -349,9 +351,6 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_AFTER, .relative_fd = bpf_program__fd(skel->progs.tc1), @@ -401,6 +400,7 @@ static void test_tc_links_after_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -502,6 +502,7 @@ static void test_tc_links_revision_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -581,22 +582,20 @@ static void test_tc_chain_classic(int target, bool chain_tc_old) assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -707,16 +706,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_REPLACE, .relative_fd = bpf_program__fd(skel->progs.tc2), @@ -781,16 +777,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "link_detach")) goto cleanup; @@ -812,16 +805,13 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1); if (!ASSERT_OK(err, "link_update_self")) goto cleanup; @@ -843,6 +833,7 @@ static void test_tc_links_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1254,6 +1245,7 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1261,9 +1253,6 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, ); @@ -1311,6 +1300,7 @@ static void test_tc_links_prepend_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1411,6 +1401,7 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1418,9 +1409,6 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - LIBBPF_OPTS_RESET(optl, .flags = BPF_F_AFTER, ); @@ -1468,6 +1456,7 @@ static void test_tc_links_append_target(int target) ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1637,38 +1626,33 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4); if (!ASSERT_OK(err, "link_update")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - err = bpf_link__detach(skel->links.tc6); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); @@ -1758,22 +1742,20 @@ static void test_tc_links_ingress(int target, bool chain_tc_old, assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_link__detach(skel->links.tc2); if (!ASSERT_OK(err, "prog_detach")) goto cleanup; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 99af79ea21a9..ca506d2fcf58 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -59,6 +59,7 @@ void serial_test_tc_opts_basic(void) ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -83,6 +84,7 @@ void serial_test_tc_opts_basic(void) ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -163,6 +165,7 @@ static void test_tc_opts_before_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -219,6 +222,7 @@ static void test_tc_opts_before_target(int target) ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -313,6 +317,7 @@ static void test_tc_opts_after_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -369,6 +374,7 @@ static void test_tc_opts_after_target(int target) ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -514,6 +520,7 @@ static void test_tc_opts_revision_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -608,22 +615,20 @@ static void test_tc_chain_classic(int target, bool chain_tc_old) assert_mprog_count(target, 2); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - err = bpf_prog_detach_opts(fd2, loopback, target, &optd); if (!ASSERT_OK(err, "prog_detach")) goto cleanup_detach; assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -635,7 +640,7 @@ cleanup_detach: if (!ASSERT_OK(err, "prog_detach")) goto cleanup; - __assert_mprog_count(target, 0, chain_tc_old, loopback); + assert_mprog_count(target, 0); cleanup: if (tc_attached) { tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; @@ -730,16 +735,13 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]"); ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE, .replace_prog_fd = fd2, @@ -767,16 +769,13 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); - skel->bss->seen_tc1 = false; - skel->bss->seen_tc2 = false; - skel->bss->seen_tc3 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE | BPF_F_BEFORE, .replace_prog_fd = fd3, @@ -805,6 +804,7 @@ static void test_tc_opts_replace_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1084,6 +1084,7 @@ static void test_tc_opts_prepend_target(int target) ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1124,6 +1125,7 @@ static void test_tc_opts_prepend_target(int target) ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1222,6 +1224,7 @@ static void test_tc_opts_append_target(int target) ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -1262,6 +1265,7 @@ static void test_tc_opts_append_target(int target) ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); @@ -2250,7 +2254,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old) BPF_TC_INGRESS : BPF_TC_EGRESS; err = bpf_tc_hook_create(&tc_hook); ASSERT_OK(err, "bpf_tc_hook_create"); - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); } err = bpf_prog_detach_opts(0, loopback, target, &optd); ASSERT_EQ(err, -ENOENT, "prog_detach"); @@ -2316,16 +2320,13 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - LIBBPF_OPTS_RESET(opta, .flags = BPF_F_REPLACE, .replace_prog_fd = fd3, @@ -2339,21 +2340,19 @@ static void test_tc_chain_mixed(int target) assert_mprog_count(target, 1); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); - skel->bss->seen_tc4 = false; - skel->bss->seen_tc5 = false; - skel->bss->seen_tc6 = false; - cleanup_opts: err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); - __assert_mprog_count(target, 0, true, loopback); + assert_mprog_count(target, 0); + tc_skel_reset_all_seen(skel); ASSERT_OK(system(ping_cmd), ping_cmd); ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); @@ -2462,3 +2461,229 @@ void serial_test_tc_opts_max(void) test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true); test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false); } + +static void test_tc_opts_query_target(int target) +{ + const size_t attr_size = offsetofend(union bpf_attr, query); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + union bpf_attr attr; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + /* Test 1: Double query via libbpf API */ + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids, NULL, "prog_ids"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + /* Test 2: Double query via bpf_attr & bpf(2) directly */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query(void) +{ + test_tc_opts_query_target(BPF_TCX_INGRESS); + test_tc_opts_query_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_query_attach_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct test_tc_link *skel; + __u32 prog_ids[2]; + __u32 fd1, id1; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + id1 = id_from_prog_fd(fd1); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 0, "count"); + ASSERT_EQ(optq.revision, 1, "revision"); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = optq.revision, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup1; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query_attach(void) +{ + test_tc_opts_query_attach_target(BPF_TCX_INGRESS); + test_tc_opts_query_attach_target(BPF_TCX_EGRESS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 290c21dbe65a..760ad96b4be0 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> #include "timer.skel.h" +#include "timer_failure.skel.h" static int timer(struct timer *timer_skel) { @@ -14,6 +15,7 @@ static int timer(struct timer *timer_skel) ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1"); ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1"); + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1"); prog_fd = bpf_program__fd(timer_skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -32,6 +34,9 @@ static int timer(struct timer *timer_skel) /* check that timer_cb3() was executed twice */ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data"); + /* check that timer_cb_pinned() was executed twice */ + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check"); + /* check that there were no errors in timer execution */ ASSERT_EQ(timer_skel->bss->err, 0, "err"); @@ -49,10 +54,11 @@ void serial_test_timer(void) timer_skel = timer__open_and_load(); if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) - goto cleanup; + return; err = timer(timer_skel); ASSERT_OK(err, "timer"); -cleanup: timer__destroy(timer_skel); + + RUN_TESTS(timer_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c new file mode 100644 index 000000000000..cf3e0e7a64fa --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Hengqi Chen */ + +#include <test_progs.h> +#include "test_uprobe.skel.h" + +static FILE *urand_spawn(int *pid) +{ + FILE *f; + + /* urandom_read's stdout is wired into f */ + f = popen("./urandom_read 1 report-pid", "r"); + if (!f) + return NULL; + + if (fscanf(f, "%d", pid) != 1) { + pclose(f); + errno = EINVAL; + return NULL; + } + + return f; +} + +static int urand_trigger(FILE **urand_pipe) +{ + int exit_code; + + /* pclose() waits for child process to exit and returns their exit code */ + exit_code = pclose(*urand_pipe); + *urand_pipe = NULL; + + return exit_code; +} + +void test_uprobe(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + FILE *urand_pipe = NULL; + int urand_pid = 0, err; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + urand_pipe = urand_spawn(&urand_pid); + if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn")) + goto cleanup; + + skel->bss->my_pid = urand_pid; + + /* Manual attach uprobe to urandlib_api + * There are two `urandlib_api` symbols in .dynsym section: + * - urandlib_api@LIBURANDOM_READ_1.0.0 + * - urandlib_api@@LIBURANDOM_READ_2.0.0 + * Both are global bind and would cause a conflict if user + * specify the symbol name without a version suffix + */ + uprobe_opts.func_name = "urandlib_api"; + skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4, + urand_pid, + "./liburandom_read.so", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict")) + goto cleanup; + + uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0"; + skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4, + urand_pid, + "./liburandom_read.so", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok")) + goto cleanup; + + /* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */ + err = test_uprobe__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger urandom_read */ + ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code"); + + ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset"); + ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1"); + ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2"); + ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api"); + +cleanup: + if (urand_pipe) + pclose(urand_pipe); + test_uprobe__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 626c461fa34d..4439ba9392f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -226,7 +226,7 @@ static int verify_xsk_metadata(struct xsk *xsk) __u64 comp_addr; void *data; __u64 addr; - __u32 idx; + __u32 idx = 0; int ret; ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index dd923dc637d5..dd923dc637d5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 38a57a2e70db..799fff4995d8 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -99,6 +99,9 @@ #elif defined(__TARGET_ARCH_arm64) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__arm64_" +#elif defined(__TARGET_ARCH_riscv) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__riscv_" #else #define SYSCALL_WRAPPER 0 #define SYS_PREFIX "__se_" diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c new file mode 100644 index 000000000000..ca8aa2f116b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/connect_unix") +int connect_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + /* Rewrite destination. */ + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 0; + + if (sa_kern->uaddrlen != unaddrlen) + return 0; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index fa35832e6748..e1e5c54a6a11 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -31,35 +31,35 @@ check_assert(s64, eq, llong_max, LLONG_MAX); __msg(": R0_w=scalar(smax=2147483646) R10=fp0") check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, neg, INT_MIN); __msg(": R0_w=scalar(smax=2147483647) R10=fp0") check_assert(s64, le, pos, INT_MAX); __msg(": R0_w=scalar(smax=0) R10=fp0") check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, le, neg, INT_MIN); -__msg(": R0_w=scalar(umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, zero, 0); __msg(": R0_w=scalar(smin=-2147483647) R10=fp0") check_assert(s64, gt, neg, INT_MIN); -__msg(": R0_w=scalar(umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") +__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") check_assert(s64, ge, zero, 0); __msg(": R0_w=scalar(smin=-2147483648) R10=fp0") check_assert(s64, ge, neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=-2147483646,smax=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=scalar(umin=4096,umax=8192,var_off=(0x0; 0x3fff))") +__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") int check_assert_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c new file mode 100644 index 000000000000..9c078f34bbb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/getpeername_unix") +int getpeername_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c new file mode 100644 index 000000000000..ac7145111497 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/getsockname_unix") +int getsockname_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c new file mode 100644 index 000000000000..44edecfdfaee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_experimental.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +pid_t target_pid = 0; +unsigned int vmas_seen = 0; + +struct { + __u64 vm_start; + __u64 vm_end; +} vm_ranges[1000]; + +SEC("raw_tp/sys_enter") +int iter_task_vma_for_each(const void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct vm_area_struct *vma; + unsigned int seen = 0; + + if (task->pid != target_pid) + return 0; + + if (vmas_seen) + return 0; + + bpf_for_each(task_vma, vma, task, 0) { + if (seen >= 1000) + break; + + vm_ranges[seen].vm_start = vma->vm_start; + vm_ranges[seen].vm_end = vma->vm_end; + seen++; + } + + vmas_seen = seen; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c new file mode 100644 index 000000000000..7f9ef701f5de --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_kfunc_common_test(); + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test2(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c new file mode 100644 index 000000000000..8ea71cbd6c45 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe.multi/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_kfunc_common_test(); + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test2(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test3(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test4(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe.multi/bpf_kfunc_common_test") +int test5(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c new file mode 100644 index 000000000000..762385f827c5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_printk("test"); + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test2(struct pt_regs *ctx) +{ + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test3(struct pt_regs *ctx) +{ + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test4(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c index bbc45346e006..37c2d2608ec0 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c @@ -71,6 +71,7 @@ int BPF_PROG(test_array_map_2) } int cpu0_field_d, sum_field_c; +int my_pid; /* Summarize percpu data */ SEC("?fentry/bpf_fentry_test3") @@ -81,6 +82,9 @@ int BPF_PROG(test_array_map_3) struct val_t *v; struct elem *e; + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + e = bpf_map_lookup_elem(&array, &index); if (!e) return 0; @@ -130,6 +134,9 @@ int BPF_PROG(test_array_map_10) struct val_t *v; struct elem *e; + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + e = bpf_map_lookup_elem(&array, &index); if (!e) return 0; diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c index 1c36a241852c..a2acf9aa6c24 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c @@ -70,6 +70,7 @@ int BPF_PROG(test_cgrp_local_storage_2) } int cpu0_field_d, sum_field_c; +int my_pid; /* Summarize percpu data collection */ SEC("fentry/bpf_fentry_test3") @@ -81,6 +82,9 @@ int BPF_PROG(test_cgrp_local_storage_3) struct elem *e; int i; + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + task = bpf_get_current_task_btf(); e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0); if (!e) diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index f799d87e8700..897061930cb7 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -609,7 +609,7 @@ out: } SEC("tracepoint/syscalls/sys_enter_kill") -int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) { struct bpf_func_stats_ctx stats_ctx; diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c new file mode 100644 index 000000000000..4dfbc8552558 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test"; + +SEC("cgroup/recvmsg_unix") +int recvmsg_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS, + sizeof(SERVUN_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS, + sizeof(SERVUN_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c new file mode 100644 index 000000000000..1f67e832666e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/sendmsg_unix") +int sendmsg_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + /* Rewrite destination. */ + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 0; + + if (sa_kern->uaddrlen != unaddrlen) + return 0; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 67c14ba1e87b..3ddcb3777912 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -6,7 +6,8 @@ #include <bpf/bpf_tracing.h> #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; @@ -104,7 +105,11 @@ int _tc(volatile struct __sk_buff *skb) "%[tmp_mark] = r1" : [tmp_mark]"=r"(tmp_mark) : [ctx]"r"(skb), - [off_mark]"i"(offsetof(struct __sk_buff, mark)) + [off_mark]"i"(offsetof(struct __sk_buff, mark) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + + sizeof(skb->mark) - 1 +#endif + ) : "r1"); #else tmp_mark = (char)skb->mark; diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c new file mode 100644 index 000000000000..896c88a4960d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_uprobe.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Hengqi Chen */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +pid_t my_pid = 0; + +int test1_result = 0; +int test2_result = 0; +int test3_result = 0; +int test4_result = 0; + +SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset") +int BPF_UPROBE(test1) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test1_result = 1; + return 0; +} + +SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0") +int BPF_UPROBE(test2) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test2_result = 1; + return 0; +} + +SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0") +int BPF_URETPROBE(test3, int ret) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test3_result = ret; + return 0; +} + +SEC("uprobe") +int BPF_UPROBE(test4) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test4_result = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 4b8e37f7fd06..78b23934d9f8 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -16,12 +16,12 @@ bool kprobe_called = false; bool fentry_called = false; SEC("tp/syscalls/sys_enter_nanosleep") -int handle__tp(struct trace_event_raw_sys_enter *args) +int handle__tp(struct syscall_trace_enter *args) { struct __kernel_timespec *ts; long tv_nsec; - if (args->id != __NR_nanosleep) + if (args->nr != __NR_nanosleep) return 0; ts = (void *)args->args[0]; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 9a16d95213e1..8b946c8188c6 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,7 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -59,6 +59,8 @@ __u64 err; __u64 ok; __u64 callback_check = 52; __u64 callback2_check = 52; +__u64 pinned_callback_check; +__s32 pinned_cpu; #define ARRAY 1 #define HTAB 2 @@ -329,3 +331,62 @@ int BPF_PROG2(test3, int, a) return 0; } + +/* callback for pinned timer */ +static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer) +{ + __s32 cpu = bpf_get_smp_processor_id(); + + if (cpu != pinned_cpu) + err |= 16384; + + pinned_callback_check++; + return 0; +} + +static void test_pinned_timer(bool soft) +{ + int key = 0; + void *map; + struct bpf_timer *timer; + __u64 flags = BPF_F_TIMER_CPU_PIN; + __u64 start_time; + + if (soft) { + map = &soft_timer_pinned; + start_time = 0; + } else { + map = &abs_timer_pinned; + start_time = bpf_ktime_get_boot_ns(); + flags |= BPF_F_TIMER_ABS; + } + + timer = bpf_map_lookup_elem(map, &key); + if (timer) { + if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0) + err |= 4096; + bpf_timer_set_callback(timer, timer_cb_pinned); + pinned_cpu = bpf_get_smp_processor_id(); + bpf_timer_start(timer, start_time + 1000, flags); + } else { + err |= 8192; + } +} + +SEC("fentry/bpf_fentry_test4") +int BPF_PROG2(test4, int, a) +{ + bpf_printk("test4"); + test_pinned_timer(true); + + return 0; +} + +SEC("fentry/bpf_fentry_test5") +int BPF_PROG2(test5, int, a) +{ + bpf_printk("test5"); + test_pinned_timer(false); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c new file mode 100644 index 000000000000..226d33b5a05c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer_map SEC(".maps"); + +static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +{ + if (bpf_get_smp_processor_id() % 2) + return 1; + else + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__failure __msg("should have been in (0x0; 0x0)") +int BPF_PROG2(test_ret_1, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + timer = bpf_map_lookup_elem(&timer_map, &key); + if (timer) { + bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_start(timer, 1000, 0); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 5d54f8eae6a1..107525fb4a6a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index aa54ecd5829e..9f202eda952f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 1e1bc379c44f..375525329637 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") @@ -13,12 +14,16 @@ __description("LDSX, S8") __success __success_unpriv __retval(-2) __naked void ldsx_s8(void) { - asm volatile (" \ - r1 = 0x3fe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s8 *)(r10 - 8); \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0x3fe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s8 *)(r10 - 8);" +#else + "r0 = *(s8 *)(r10 - 1);" +#endif + "exit;" + ::: __clobber_all); } SEC("socket") @@ -26,12 +31,16 @@ __description("LDSX, S16") __success __success_unpriv __retval(-2) __naked void ldsx_s16(void) { - asm volatile (" \ - r1 = 0x3fffe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s16 *)(r10 - 8); \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0x3fffe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s16 *)(r10 - 8);" +#else + "r0 = *(s16 *)(r10 - 2);" +#endif + "exit;" + ::: __clobber_all); } SEC("socket") @@ -39,35 +48,43 @@ __description("LDSX, S32") __success __success_unpriv __retval(-1) __naked void ldsx_s32(void) { - asm volatile (" \ - r1 = 0xfffffffe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s32 *)(r10 - 8); \ - r0 >>= 1; \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0xfffffffe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s32 *)(r10 - 8);" +#else + "r0 = *(s32 *)(r10 - 4);" +#endif + "r0 >>= 1;" + "exit;" + ::: __clobber_all); } SEC("socket") __description("LDSX, S8 range checking, privileged") __log_level(2) __success __retval(1) -__msg("R1_w=scalar(smin=-128,smax=127)") +__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)") __naked void ldsx_s8_range_priv(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s8 *)(r10 - 8); \ - /* r1 with s8 range */ \ - if r1 s> 0x7f goto l0_%=; \ - if r1 s< -0x80 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s8 *)(r10 - 8);" +#else + "r1 = *(s8 *)(r10 - 1);" +#endif + /* r1 with s8 range */ + "if r1 s> 0x7f goto l0_%=;" + "if r1 s< -0x80 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -77,20 +94,24 @@ __description("LDSX, S16 range checking") __success __success_unpriv __retval(1) __naked void ldsx_s16_range(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s16 *)(r10 - 8); \ - /* r1 with s16 range */ \ - if r1 s> 0x7fff goto l0_%=; \ - if r1 s< -0x8000 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s16 *)(r10 - 8);" +#else + "r1 = *(s16 *)(r10 - 2);" +#endif + /* r1 with s16 range */ + "if r1 s> 0x7fff goto l0_%=;" + "if r1 s< -0x8000 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -100,20 +121,24 @@ __description("LDSX, S32 range checking") __success __success_unpriv __retval(1) __naked void ldsx_s32_range(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s32 *)(r10 - 8); \ - /* r1 with s16 range */ \ - if r1 s> 0x7fffFFFF goto l0_%=; \ - if r1 s< -0x80000000 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s32 *)(r10 - 8);" +#else + "r1 = *(s32 *)(r10 - 4);" +#endif + /* r1 with s16 range */ + "if r1 s> 0x7fffFFFF goto l0_%=;" + "if r1 s< -0x80000000 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index ca11fd5dafd1..b2a04d1179d0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index fb039722b639..8fc5174808b2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -5,7 +5,8 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || defined(__TARGET_ARCH_arm)) && \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index 24369f242853..ccde6a4c6319 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -3,11 +3,12 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "xsk_xdp_metadata.h" +#include <linux/if_ether.h> +#include "xsk_xdp_common.h" struct { __uint(type, BPF_MAP_TYPE_XSKMAP); - __uint(max_entries, 1); + __uint(max_entries, 2); __uint(key_size, sizeof(int)); __uint(value_size, sizeof(int)); } xsk SEC(".maps"); @@ -52,4 +53,21 @@ SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp) return bpf_redirect_map(&xsk, 0, XDP_DROP); } +SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_DROP; + + /* Redirecting packets based on the destination MAC address */ + idx = ((unsigned int)(eth->h_dest[5])) / 2; + if (idx > MAX_SOCKETS) + return XDP_DROP; + + return bpf_redirect_map(&xsk, idx, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index b4edd8454934..37ffa57f28a1 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -69,7 +69,7 @@ static int tester_init(struct test_loader *tester) { if (!tester->log_buf) { tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ; - tester->log_buf = malloc(tester->log_buf_sz); + tester->log_buf = calloc(tester->log_buf_sz, 1); if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf")) return -ENOMEM; } @@ -538,7 +538,7 @@ void run_subtest(struct test_loader *tester, bool unpriv) { struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv; - struct bpf_program *tprog, *tprog_iter; + struct bpf_program *tprog = NULL, *tprog_iter; struct test_spec *spec_iter; struct cap_state caps = {}; struct bpf_object *tobj; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 4d582cac2c09..1b9387890148 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -255,7 +255,7 @@ static void print_subtest_name(int test_num, int subtest_num, const char *test_name, char *subtest_name, char *result) { - char test_num_str[TEST_NUM_WIDTH + 1]; + char test_num_str[32]; snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 77bd492c6024..2f9f6f250f17 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -417,6 +417,8 @@ int get_bpf_max_tramp_links(void); #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" #elif defined(__aarch64__) #define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep" +#elif defined(__riscv) +#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep" #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index e92644d0fa75..4ed795655b9f 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -11,6 +11,9 @@ #define _SDT_HAS_SEMAPHORES 1 #include "sdt.h" +#define SHARED 1 +#include "bpf/libbpf_internal.h" + #define SEC(name) __attribute__((section(name), used)) #define BUF_SIZE 256 @@ -21,10 +24,14 @@ void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz); void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz); void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz); +int urandlib_api(void); +COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0) +int urandlib_api_old(void); +int urandlib_api_sameoffset(void); + unsigned short urand_read_with_sema_semaphore SEC(".probes"); -static __attribute__((noinline)) -void urandom_read(int fd, int count) +static noinline void urandom_read(int fd, int count) { char buf[BUF_SIZE]; int i; @@ -83,6 +90,10 @@ int main(int argc, char *argv[]) urandom_read(fd, count); + urandlib_api(); + urandlib_api_old(); + urandlib_api_sameoffset(); + close(fd); return 0; } diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c index 86186e24b740..8c1356d8b4ee 100644 --- a/tools/testing/selftests/bpf/urandom_read_lib1.c +++ b/tools/testing/selftests/bpf/urandom_read_lib1.c @@ -3,6 +3,9 @@ #define _SDT_HAS_SEMAPHORES 1 #include "sdt.h" +#define SHARED 1 +#include "bpf/libbpf_internal.h" + #define SEC(name) __attribute__((section(name), used)) unsigned short urandlib_read_with_sema_semaphore SEC(".probes"); @@ -11,3 +14,22 @@ void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz) { STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz); } + +COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0) +int urandlib_api_v1(void) +{ + return 1; +} + +DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0) +int urandlib_api_v2(void) +{ + return 2; +} + +COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0) +DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0) +int urandlib_api_sameoffset(void) +{ + return 3; +} diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index b449788fbd39..595c79141cf3 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -360,9 +360,9 @@ static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val, static int dut_run(struct xdp_features *skel) { int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE; - int state, err, *sockfd, ctrl_sockfd, echo_sockfd; + int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd; struct sockaddr_storage ctrl_addr; - pthread_t dut_thread; + pthread_t dut_thread = 0; socklen_t addrlen; sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL, diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 613321eb84c1..17c980138796 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -234,7 +234,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t struct pollfd fds[rxq + 1]; __u64 comp_addr; __u64 addr; - __u32 idx; + __u32 idx = 0; int ret; int i; diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index d9fb2b730a2c..e574711eeb84 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -442,10 +442,9 @@ void xsk_clear_xskmap(struct bpf_map *map) bpf_map_delete_elem(map_fd, &index); } -int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk) +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index) { int map_fd, sock_fd; - u32 index = 0; map_fd = bpf_map__fd(map); sock_fd = xsk_socket__fd(xsk); diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index d93200fdaa8d..771570bc3731 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -204,7 +204,7 @@ struct xsk_umem_config { int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags); void xsk_detach_xdp_program(int ifindex, u32 xdp_flags); -int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk); +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index); void xsk_clear_xskmap(struct bpf_map *map); bool xsk_is_in_mode(u32 ifindex, int mode); diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h new file mode 100644 index 000000000000..5a6f36f07383 --- /dev/null +++ b/tools/testing/selftests/bpf/xsk_xdp_common.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef XSK_XDP_COMMON_H_ +#define XSK_XDP_COMMON_H_ + +#define MAX_SOCKETS 2 + +struct xdp_info { + __u64 count; +} __attribute__((aligned(32))); + +#endif /* XSK_XDP_COMMON_H_ */ diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h deleted file mode 100644 index 943133da378a..000000000000 --- a/tools/testing/selftests/bpf/xsk_xdp_metadata.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -struct xdp_info { - __u64 count; -} __attribute__((aligned(32))); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 43e0a5796929..591ca9637b23 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -80,6 +80,7 @@ #include <linux/if_ether.h> #include <linux/mman.h> #include <linux/netdev.h> +#include <linux/bitmap.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> @@ -102,10 +103,7 @@ #include <bpf/bpf.h> #include <linux/filter.h> #include "../kselftest.h" -#include "xsk_xdp_metadata.h" - -static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; -static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; +#include "xsk_xdp_common.h" static bool opt_verbose; static bool opt_print_tests; @@ -159,10 +157,10 @@ static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) ptr[i] = htonl(pkt_nb << 16 | (i + start)); } -static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) +static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr) { - memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); + memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_LOOPBACK); } @@ -260,7 +258,7 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i cfg.bind_flags = ifobject->bind_flags; if (shared) cfg.bind_flags |= XDP_SHARED_UMEM; - if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE) + if (ifobject->mtu > MAX_ETH_PKT_SIZE) cfg.bind_flags |= XDP_USE_SG; txr = ifobject->tx_on ? &xsk->tx : NULL; @@ -429,11 +427,9 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, if (i == 0) { ifobj->rx_on = false; ifobj->tx_on = true; - ifobj->pkt_stream = test->tx_pkt_stream_default; } else { ifobj->rx_on = true; ifobj->tx_on = false; - ifobj->pkt_stream = test->rx_pkt_stream_default; } memset(ifobj->umem, 0, sizeof(*ifobj->umem)); @@ -443,6 +439,15 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + if (i == 0) + ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; + else + ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default; + + memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN); + memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN); + ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0); + ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1); } } @@ -526,8 +531,10 @@ static int test_spec_set_mtu(struct test_spec *test, int mtu) static void pkt_stream_reset(struct pkt_stream *pkt_stream) { - if (pkt_stream) + if (pkt_stream) { pkt_stream->current_pkt_nb = 0; + pkt_stream->nb_rx_pkts = 0; + } } static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) @@ -557,17 +564,17 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream) static void pkt_stream_restore_default(struct test_spec *test) { - struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; - struct pkt_stream *rx_pkt_stream = test->ifobj_rx->pkt_stream; + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream; if (tx_pkt_stream != test->tx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_tx->pkt_stream); - test->ifobj_tx->pkt_stream = test->tx_pkt_stream_default; + pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream); + test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default; } if (rx_pkt_stream != test->rx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_rx->pkt_stream); - test->ifobj_rx->pkt_stream = test->rx_pkt_stream_default; + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default; } } @@ -627,14 +634,16 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk return nb_frags; } -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) +static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) { pkt->offset = offset; pkt->len = len; - if (len > MAX_ETH_JUMBO_SIZE) + if (len > MAX_ETH_JUMBO_SIZE) { pkt->valid = false; - else + } else { pkt->valid = true; + pkt_stream->nb_valid_entries++; + } } static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) @@ -642,7 +651,7 @@ static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) return ceil_u32(len, umem->frame_size) * umem->frame_size; } -static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) +static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off) { struct pkt_stream *pkt_stream; u32 i; @@ -656,41 +665,45 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb for (i = 0; i < nb_pkts; i++) { struct pkt *pkt = &pkt_stream->pkts[i]; - pkt_set(umem, pkt, 0, pkt_len); - pkt->pkt_nb = i; + pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt->pkt_nb = nb_start + i * nb_off; } return pkt_stream; } -static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, - struct pkt_stream *pkt_stream) +static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) { - return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); + return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1); +} + +static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); } static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; - pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); - test->ifobj_tx->pkt_stream = pkt_stream; - pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len); - test->ifobj_rx->pkt_stream = pkt_stream; + pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + test->ifobj_tx->xsk->pkt_stream = pkt_stream; + pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + test->ifobj_rx->xsk->pkt_stream = pkt_stream; } static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, int offset) { - struct xsk_umem_info *umem = ifobj->umem; struct pkt_stream *pkt_stream; u32 i; - pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); - for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); + pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); + for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) + pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); - ifobj->pkt_stream = pkt_stream; + ifobj->xsk->pkt_stream = pkt_stream; + pkt_stream->nb_valid_entries /= 2; } static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) @@ -701,15 +714,34 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off static void pkt_stream_receive_half(struct test_spec *test) { - struct xsk_umem_info *umem = test->ifobj_rx->umem; - struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; + struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream; u32 i; - test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, - pkt_stream->pkts[0].len); - pkt_stream = test->ifobj_rx->pkt_stream; + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + pkt_stream = test->ifobj_rx->xsk->pkt_stream; for (i = 1; i < pkt_stream->nb_pkts; i += 2) pkt_stream->pkts[i].valid = false; + + pkt_stream->nb_valid_entries /= 2; +} + +static void pkt_stream_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *pkt_stream; + u32 i; + + for (i = 0; i < test->nb_sockets; i++) { + pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream; + + pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream; + } } static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) @@ -724,16 +756,16 @@ static void pkt_stream_cancel(struct pkt_stream *pkt_stream) pkt_stream->current_pkt_nb--; } -static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, - u32 bytes_written) +static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len, + u32 pkt_nb, u32 bytes_written) { - void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); + void *data = xsk_umem__get_data(umem->buffer, addr); if (len < MIN_PKT_SIZE) return; if (!bytes_written) { - gen_eth_hdr(ifobject, data); + gen_eth_hdr(xsk, data); len -= PKT_HDR_SIZE; data += PKT_HDR_SIZE; @@ -783,6 +815,10 @@ static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, s if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) pkt_stream->max_pkt_len = pkt->len; + + if (pkt->valid) + pkt_stream->nb_valid_entries++; + pkt_nb++; } @@ -796,10 +832,10 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, struct pkt_stream *pkt_stream; pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); - test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_tx->xsk->pkt_stream = pkt_stream; pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); - test->ifobj_rx->pkt_stream = pkt_stream; + test->ifobj_rx->xsk->pkt_stream = pkt_stream; } static void pkt_print_data(u32 *data, u32 cnt) @@ -1004,145 +1040,190 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) return TEST_PASS; } -static int receive_pkts(struct test_spec *test, struct pollfd *fds) +static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) { - struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; - struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; - struct xsk_socket_info *xsk = test->ifobj_rx->xsk; + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; + struct pkt_stream *pkt_stream = xsk->pkt_stream; struct ifobject *ifobj = test->ifobj_rx; struct xsk_umem_info *umem = xsk->umem; + struct pollfd fds = { }; struct pkt *pkt; + u64 first_addr = 0; int ret; - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - timeradd(&tv_now, &tv_timeout, &tv_end); - - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - while (pkt) { - u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; - u64 first_addr; + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLIN; - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - if (timercmp(&tv_now, &tv_end, >)) { - ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); - return TEST_FAILURE; - } + ret = kick_rx(xsk); + if (ret) + return TEST_FAILURE; - ret = kick_rx(xsk); - if (ret) + if (ifobj->use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) return TEST_FAILURE; - if (ifobj->use_poll) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - return TEST_FAILURE; - - if (!ret) { - if (!is_umem_valid(test->ifobj_tx)) - return TEST_PASS; - - ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); - return TEST_FAILURE; - } + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; - if (!(fds->revents & POLLIN)) - continue; + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_CONTINUE; } - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) - continue; + if (!(fds.revents & POLLIN)) + return TEST_CONTINUE; + } - if (ifobj->use_fill_ring) { - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - return TEST_FAILURE; - } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) + return TEST_CONTINUE; + + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) + return TEST_FAILURE; } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } + } - while (frags_processed < rcvd) { - const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); - u64 addr = desc->addr, orig; + while (frags_processed < rcvd) { + const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + u64 addr = desc->addr, orig; - orig = xsk_umem__extract_addr(addr); - addr = xsk_umem__add_offset_to_addr(addr); + orig = xsk_umem__extract_addr(addr); + addr = xsk_umem__add_offset_to_addr(addr); + if (!nb_frags) { + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); if (!pkt) { ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", __func__, addr, desc->len); return TEST_FAILURE; } + } - print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", - addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); + print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", + addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); - if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || - !is_offset_correct(umem, pkt, addr) || - (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) - return TEST_FAILURE; + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && + !is_metadata_correct(pkt, umem->buffer, addr))) + return TEST_FAILURE; - if (!nb_frags++) - first_addr = addr; - frags_processed++; - pkt_len += desc->len; - if (ifobj->use_fill_ring) - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + if (!nb_frags++) + first_addr = addr; + frags_processed++; + pkt_len += desc->len; + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - if (pkt_continues(desc->options)) - continue; + if (pkt_continues(desc->options)) + continue; - /* The complete packet has been received */ - if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || - !is_offset_correct(umem, pkt, addr)) - return TEST_FAILURE; + /* The complete packet has been received */ + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || + !is_offset_correct(umem, pkt, addr)) + return TEST_FAILURE; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - nb_frags = 0; - pkt_len = 0; - } + pkt_stream->nb_rx_pkts++; + nb_frags = 0; + pkt_len = 0; + } - if (nb_frags) { - /* In the middle of a packet. Start over from beginning of packet. */ - idx_rx -= nb_frags; - xsk_ring_cons__cancel(&xsk->rx, nb_frags); - if (ifobj->use_fill_ring) { - idx_fq -= nb_frags; - xsk_ring_prod__cancel(&umem->fq, nb_frags); - } - frags_processed -= nb_frags; + if (nb_frags) { + /* In the middle of a packet. Start over from beginning of packet. */ + idx_rx -= nb_frags; + xsk_ring_cons__cancel(&xsk->rx, nb_frags); + if (ifobj->use_fill_ring) { + idx_fq -= nb_frags; + xsk_ring_prod__cancel(&umem->fq, nb_frags); } + frags_processed -= nb_frags; + } - if (ifobj->use_fill_ring) - xsk_ring_prod__submit(&umem->fq, frags_processed); - if (ifobj->release_rx) - xsk_ring_cons__release(&xsk->rx, frags_processed); + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, frags_processed); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, frags_processed); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= pkts_sent; + pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= pkts_sent; - pthread_mutex_unlock(&pacing_mutex); - pkts_sent = 0; +return TEST_CONTINUE; +} + +bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num, + unsigned long *bitmap) +{ + struct pkt_stream *pkt_stream = xsk->pkt_stream; + + if (!pkt_stream) { + __set_bit(sock_num, bitmap); + return false; + } + + if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) { + __set_bit(sock_num, bitmap); + if (bitmap_full(bitmap, test->nb_sockets)) + return true; + } + + return false; +} + +static int receive_pkts(struct test_spec *test) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + DECLARE_BITMAP(bitmap, test->nb_sockets); + struct xsk_socket_info *xsk; + u32 sock_num = 0; + int res, ret; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (1) { + xsk = &test->ifobj_rx->xsk_arr[sock_num]; + + if ((all_packets_received(test, xsk, sock_num, bitmap))) + break; + + res = __receive_pkts(test, xsk); + if (!(res == TEST_PASS || res == TEST_CONTINUE)) + return res; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + sock_num = (sock_num + 1) % test->nb_sockets; } return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) +static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout) { u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; - struct pkt_stream *pkt_stream = ifobject->pkt_stream; - struct xsk_socket_info *xsk = ifobject->xsk; + struct pkt_stream *pkt_stream = xsk->pkt_stream; struct xsk_umem_info *umem = ifobject->umem; bool use_poll = ifobject->use_poll; + struct pollfd fds = { }; int ret; buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); @@ -1154,9 +1235,12 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo return TEST_CONTINUE; } + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLOUT; + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (timeout) { if (ret < 0) { ksft_print_msg("ERROR: [%s] Poll error %d\n", @@ -1207,7 +1291,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo tx_desc->options = 0; } if (pkt->valid) - pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, + pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb, bytes_written); bytes_written += tx_desc->len; @@ -1235,7 +1319,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo xsk->outstanding_tx += valid_frags; if (use_poll) { - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (ret <= 0) { if (ret == 0 && timeout) return TEST_PASS; @@ -1281,27 +1365,43 @@ static int wait_for_tx_completion(struct xsk_socket_info *xsk) return TEST_PASS; } +bool all_packets_sent(struct test_spec *test, unsigned long *bitmap) +{ + return bitmap_full(bitmap, test->nb_sockets); +} + static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { - struct pkt_stream *pkt_stream = ifobject->pkt_stream; bool timeout = !is_umem_valid(test->ifobj_rx); - struct pollfd fds = { }; - u32 ret; + DECLARE_BITMAP(bitmap, test->nb_sockets); + u32 i, ret; - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLOUT; + while (!(all_packets_sent(test, bitmap))) { + for (i = 0; i < test->nb_sockets; i++) { + struct pkt_stream *pkt_stream; - while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { - ret = __send_pkts(ifobject, &fds, timeout); - if (ret == TEST_CONTINUE && !test->fail) - continue; - if ((ret || test->fail) && !timeout) - return TEST_FAILURE; - if (ret == TEST_PASS && timeout) - return ret; + pkt_stream = ifobject->xsk_arr[i].pkt_stream; + if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) { + __set_bit(i, bitmap); + continue; + } + ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; + + if ((ret || test->fail) && !timeout) + return TEST_FAILURE; + + if (ret == TEST_PASS && timeout) + return ret; + + ret = wait_for_tx_completion(&ifobject->xsk_arr[i]); + if (ret) + return TEST_FAILURE; + } } - return wait_for_tx_completion(ifobject->xsk); + return TEST_PASS; } static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) @@ -1347,8 +1447,8 @@ static int validate_rx_dropped(struct ifobject *ifobject) * packet being invalid). Since the last packet may or may not have * been dropped already, both outcomes must be allowed. */ - if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 || - stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1) + if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 || + stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1) return TEST_PASS; return TEST_FAILURE; @@ -1412,9 +1512,10 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) return TEST_FAILURE; } - if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { + if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + __func__, stats.tx_invalid_descs, + ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } @@ -1506,6 +1607,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) LIBBPF_OPTS(bpf_xdp_query_opts, opts); void *bufs; int ret; + u32 i; if (ifobject->umem->unaligned_mode) mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB; @@ -1528,11 +1630,14 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); + xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring); - ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); - if (ret) - exit_with_error(errno); + for (i = 0; i < test->nb_sockets; i++) { + ifobject->xsk = &ifobject->xsk_arr[i]; + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i); + if (ret) + exit_with_error(errno); + } } static void *worker_testapp_validate_tx(void *arg) @@ -1562,14 +1667,13 @@ static void *worker_testapp_validate_rx(void *arg) { struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_rx; - struct pollfd fds = { }; int err; if (test->current_step == 1) { thread_common_ops(test, ifobject); } else { xsk_clear_xskmap(ifobject->xskmap); - err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); + err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0); if (err) { ksft_print_msg("Error: Failed to update xskmap, error %s\n", strerror(-err)); @@ -1577,12 +1681,9 @@ static void *worker_testapp_validate_rx(void *arg) } } - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLIN; - pthread_barrier_wait(&barr); - err = receive_pkts(test, &fds); + err = receive_pkts(test); if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); @@ -1691,11 +1792,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i if (ifobj2) { if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); - pkt_stream_reset(ifobj2->pkt_stream); + pkt_stream_reset(ifobj2->xsk->pkt_stream); } test->current_step++; - pkt_stream_reset(ifobj1->pkt_stream); + pkt_stream_reset(ifobj1->xsk->pkt_stream); pkts_in_flight = 0; signal(SIGUSR1, handler); @@ -1719,9 +1820,15 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i pthread_join(t0, NULL); if (test->total_steps == test->current_step || test->fail) { + u32 i; + if (ifobj2) - xsk_socket__delete(ifobj2->xsk->xsk); - xsk_socket__delete(ifobj1->xsk->xsk); + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj2->xsk_arr[i].xsk); + + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj1->xsk_arr[i].xsk); + testapp_clean_xsk_umem(ifobj1); if (ifobj2 && !ifobj2->shared_umem) testapp_clean_xsk_umem(ifobj2); @@ -1793,16 +1900,18 @@ static int testapp_bidirectional(struct test_spec *test) return res; } -static int swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) +static int swap_xsk_resources(struct test_spec *test) { int ret; - xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_socket__delete(ifobj_rx->xsk->xsk); - ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + test->ifobj_tx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_rx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default; + test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default; + test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1]; + test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1]; - ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk); + ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0); if (ret) return TEST_FAILURE; @@ -1816,7 +1925,7 @@ static int testapp_xdp_prog_cleanup(struct test_spec *test) if (testapp_validate_traffic(test)) return TEST_FAILURE; - if (swap_xsk_resources(test->ifobj_tx, test->ifobj_rx)) + if (swap_xsk_resources(test)) return TEST_FAILURE; return testapp_validate_traffic(test); } @@ -1852,8 +1961,7 @@ static int testapp_stats_tx_invalid_descs(struct test_spec *test) static int testapp_stats_rx_full(struct test_spec *test) { pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; @@ -1864,8 +1972,7 @@ static int testapp_stats_rx_full(struct test_spec *test) static int testapp_stats_fill_empty(struct test_spec *test) { pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; @@ -2031,6 +2138,23 @@ static int testapp_xdp_metadata_copy(struct test_spec *test) return testapp_validate_traffic(test); } +static int testapp_xdp_shared_umem(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test->total_steps = 1; + test->nb_sockets = 2; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem, + skel_tx->progs.xsk_xdp_shared_umem, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + pkt_stream_even_odd_sequence(test); + + return testapp_validate_traffic(test); +} + static int testapp_poll_txq_tmout(struct test_spec *test) { test->ifobj_tx->use_poll = true; @@ -2117,15 +2241,11 @@ static bool hugepages_present(void) return true; } -static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - thread_func_t func_ptr) +static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr) { LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); int err; - memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); - memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - ifobj->func_ptr = func_ptr; err = xsk_load_xdp_programs(ifobj); @@ -2336,6 +2456,7 @@ static const struct test_spec tests[] = { {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty}, {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup}, {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop}, + {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem}, {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata}, {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb}, {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb}, @@ -2401,12 +2522,12 @@ int main(int argc, char **argv) modes++; } - init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); - init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); + init_iface(ifobj_rx, worker_testapp_validate_rx); + init_iface(ifobj_tx, worker_testapp_validate_tx); test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) exit_with_error(ENOMEM); test.tx_pkt_stream_default = tx_pkt_stream_default; diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 8015aeea839d..f174df2d693f 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -8,6 +8,7 @@ #include <limits.h> #include "xsk_xdp_progs.skel.h" +#include "xsk_xdp_common.h" #ifndef SOL_XDP #define SOL_XDP 283 @@ -35,7 +36,6 @@ #define TEST_SKIP 2 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 -#define MAX_SOCKETS 2 #define MAX_TEST_NAME_SIZE 48 #define MAX_TEARDOWN_ITER 10 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ @@ -59,6 +59,7 @@ #define HUGEPAGE_SIZE (2 * 1024 * 1024) #define PKT_DUMP_NB_TO_PRINT 16 #define RUN_ALL_TESTS UINT_MAX +#define NUM_MAC_ADDRESSES 4 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -87,8 +88,11 @@ struct xsk_socket_info { struct xsk_ring_prod tx; struct xsk_umem_info *umem; struct xsk_socket *xsk; + struct pkt_stream *pkt_stream; u32 outstanding_tx; u32 rxqsize; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; }; struct pkt { @@ -104,6 +108,8 @@ struct pkt_stream { u32 current_pkt_nb; struct pkt *pkts; u32 max_pkt_len; + u32 nb_rx_pkts; + u32 nb_valid_entries; bool verbatim; }; @@ -120,7 +126,6 @@ struct ifobject { struct xsk_umem_info *umem; thread_func_t func_ptr; validation_func_t validation_func; - struct pkt_stream *pkt_stream; struct xsk_xdp_progs *xdp_progs; struct bpf_map *xskmap; struct bpf_program *xdp_prog; @@ -140,8 +145,6 @@ struct ifobject { bool unaligned_supp; bool multi_buff_supp; bool multi_buff_zc_supp; - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; }; struct test_spec { @@ -168,4 +171,6 @@ pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; int pkts_in_flight; +static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; + #endif /* XSKXCEIVER_H_ */ diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 7f7d20f22207..46e20b13473c 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -31,36 +31,43 @@ devlink_wait() fw_flash_test() { + DUMMYFILE=$(find /lib/firmware -maxdepth 1 -type f -printf '%f\n' |head -1) RET=0 - devlink dev flash $DL_HANDLE file dummy + if [ -z "$DUMMYFILE" ] + then + echo "SKIP: unable to find suitable dummy firmware file" + return + fi + + devlink dev flash $DL_HANDLE file $DUMMYFILE check_err $? "Failed to flash with status updates on" - devlink dev flash $DL_HANDLE file dummy component fw.mgmt + devlink dev flash $DL_HANDLE file $DUMMYFILE component fw.mgmt check_err $? "Failed to flash with component attribute" - devlink dev flash $DL_HANDLE file dummy overwrite settings + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings check_fail $? "Flash with overwrite settings should be rejected" echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask check_err $? "Failed to change allowed overwrite mask" - devlink dev flash $DL_HANDLE file dummy overwrite settings + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings check_err $? "Failed to flash with settings overwrite enabled" - devlink dev flash $DL_HANDLE file dummy overwrite identifiers + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers check_fail $? "Flash with overwrite settings should be identifiers" echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask check_err $? "Failed to change allowed overwrite mask" - devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers overwrite settings check_err $? "Failed to flash with settings and identifiers overwrite enabled" echo "n"> $DEBUGFS_DIR/fw_update_status check_err $? "Failed to disable status updates" - devlink dev flash $DL_HANDLE file dummy + devlink dev flash $DL_HANDLE file $DUMMYFILE check_err $? "Failed to flash with status updates off" log_test "fw flash test" diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8b017070960d..61939a695f95 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -89,6 +89,7 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh +TEST_PROGS += fdb_flush.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index a86222143d79..7e534594167e 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -294,7 +294,6 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { - int err; int cfd; socklen_t len; struct ucred peer_cred; diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index 532459a15067..a7c51889acd5 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -180,9 +180,7 @@ main(int argc, char **argv) char buf[1024]; int on = 0; char oob; - int flags; int atmark; - char *tmp_file; lfd = socket(AF_UNIX, SOCK_STREAM, 0); memset(&consumer_addr, 0, sizeof(consumer_addr)); diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh new file mode 100755 index 000000000000..90e7a29e0476 --- /dev/null +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -0,0 +1,812 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking functionality of flushing FDB entries. +# Check that flush works as expected with all the supported arguments and verify +# some combinations of arguments. + +FLUSH_BY_STATE_TESTS=" + vxlan_test_flush_by_permanent + vxlan_test_flush_by_nopermanent + vxlan_test_flush_by_static + vxlan_test_flush_by_nostatic + vxlan_test_flush_by_dynamic + vxlan_test_flush_by_nodynamic +" + +FLUSH_BY_FLAG_TESTS=" + vxlan_test_flush_by_extern_learn + vxlan_test_flush_by_noextern_learn + vxlan_test_flush_by_router + vxlan_test_flush_by_norouter +" + +TESTS=" + vxlan_test_flush_by_dev + vxlan_test_flush_by_vni + vxlan_test_flush_by_src_vni + vxlan_test_flush_by_port + vxlan_test_flush_by_dst_ip + vxlan_test_flush_by_nhid + $FLUSH_BY_STATE_TESTS + $FLUSH_BY_FLAG_TESTS + vxlan_test_flush_by_several_args + vxlan_test_flush_by_remote_attributes + bridge_test_flush_by_dev + bridge_test_flush_by_vlan + bridge_vxlan_test_flush +" + +: ${VERBOSE:=0} +: ${PAUSE_ON_FAIL:=no} +: ${PAUSE:=no} +: ${VXPORT:=4789} + +run_cmd() +{ + local cmd="$1" + local out + local rc + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + local nsuccess + local nfail + local ret + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +MAC_POOL_1=" + de:ad:be:ef:13:10 + de:ad:be:ef:13:11 + de:ad:be:ef:13:12 + de:ad:be:ef:13:13 + de:ad:be:ef:13:14 +" +mac_pool_1_len=$(echo "$MAC_POOL_1" | grep -c .) + +MAC_POOL_2=" + ca:fe:be:ef:13:10 + ca:fe:be:ef:13:11 + ca:fe:be:ef:13:12 + ca:fe:be:ef:13:13 + ca:fe:be:ef:13:14 +" +mac_pool_2_len=$(echo "$MAC_POOL_2" | grep -c .) + +fdb_add_mac_pool_1() +{ + local dev=$1; shift + local args="$@" + + for mac in $MAC_POOL_1 + do + $BRIDGE fdb add $mac dev $dev $args + done +} + +fdb_add_mac_pool_2() +{ + local dev=$1; shift + local args="$@" + + for mac in $MAC_POOL_2 + do + $BRIDGE fdb add $mac dev $dev $args + done +} + +fdb_check_n_entries_by_dev_filter() +{ + local dev=$1; shift + local exp_entries=$1; shift + local filter="$@" + + local entries=$($BRIDGE fdb show dev $dev | grep "$filter" | wc -l) + + [[ $entries -eq $exp_entries ]] + rc=$? + + log_test $rc 0 "$dev: Expected $exp_entries FDB entries, got $entries" + return $rc +} + +vxlan_test_flush_by_dev() +{ + local vni=3000 + local dst_ip=192.0.2.1 + + fdb_add_mac_pool_1 vx10 vni $vni dst $dst_ip + fdb_add_mac_pool_2 vx20 vni $vni dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len + fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len + + run_cmd "$BRIDGE fdb flush dev vx10" + log_test $? 0 "Flush FDB by dev vx10" + + fdb_check_n_entries_by_dev_filter vx10 0 + log_test $? 0 "Flush FDB by dev vx10 - test vx10 entries" + + fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len + log_test $? 0 "Flush FDB by dev vx10 - test vx20 entries" +} + +vxlan_test_flush_by_vni() +{ + local vni_1=3000 + local vni_2=4000 + local dst_ip=192.0.2.1 + + fdb_add_mac_pool_1 vx10 vni $vni_1 dst $dst_ip + fdb_add_mac_pool_2 vx10 vni $vni_2 dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len vni $vni_2 + + run_cmd "$BRIDGE fdb flush dev vx10 vni $vni_2" + log_test $? 0 "Flush FDB by dev vx10 and vni $vni_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1 + log_test $? 0 "Test entries with vni $vni_1" + + fdb_check_n_entries_by_dev_filter vx10 0 vni $vni_2 + log_test $? 0 "Test entries with vni $vni_2" +} + +vxlan_test_flush_by_src_vni() +{ + # Set some entries with {vni=x,src_vni=y} and some with the opposite - + # {vni=y,src_vni=x}, to verify that when we flush by src_vni=x, entries + # with vni=x are not flused. + local vni_1=3000 + local vni_2=4000 + local src_vni_1=4000 + local src_vni_2=3000 + local dst_ip=192.0.2.1 + + # Reconfigure vx10 with 'external' to get 'src_vni' details in + # 'bridge fdb' output + $IP link del dev vx10 + $IP link add name vx10 type vxlan dstport "$VXPORT" external + + fdb_add_mac_pool_1 vx10 vni $vni_1 src_vni $src_vni_1 dst $dst_ip + fdb_add_mac_pool_2 vx10 vni $vni_2 src_vni $src_vni_2 dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \ + src_vni $src_vni_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len \ + src_vni $src_vni_2 + + run_cmd "$BRIDGE fdb flush dev vx10 src_vni $src_vni_2" + log_test $? 0 "Flush FDB by dev vx10 and src_vni $src_vni_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \ + src_vni $src_vni_1 + log_test $? 0 "Test entries with src_vni $src_vni_1" + + fdb_check_n_entries_by_dev_filter vx10 0 src_vni $src_vni_2 + log_test $? 0 "Test entries with src_vni $src_vni_2" +} + +vxlan_test_flush_by_port() +{ + local port_1=1234 + local port_2=4321 + local dst_ip=192.0.2.1 + + fdb_add_mac_pool_1 vx10 port $port_1 dst $dst_ip + fdb_add_mac_pool_2 vx10 port $port_2 dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len port $port_2 + + run_cmd "$BRIDGE fdb flush dev vx10 port $port_2" + log_test $? 0 "Flush FDB by dev vx10 and port $port_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1 + log_test $? 0 "Test entries with port $port_1" + + fdb_check_n_entries_by_dev_filter vx10 0 port $port_2 + log_test $? 0 "Test entries with port $port_2" +} + +vxlan_test_flush_by_dst_ip() +{ + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + + fdb_add_mac_pool_1 vx10 dst $dst_ip_1 + fdb_add_mac_pool_2 vx10 dst $dst_ip_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2 + + run_cmd "$BRIDGE fdb flush dev vx10 dst $dst_ip_2" + log_test $? 0 "Flush FDB by dev vx10 and dst $dst_ip_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + log_test $? 0 "Test entries with dst $dst_ip_1" + + fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2 + log_test $? 0 "Test entries with dst $dst_ip_2" +} + +nexthops_add() +{ + local nhid_1=$1; shift + local nhid_2=$1; shift + + $IP nexthop add id 10 via 192.0.2.1 fdb + $IP nexthop add id $nhid_1 group 10 fdb + + $IP nexthop add id 20 via 192.0.2.2 fdb + $IP nexthop add id $nhid_2 group 20 fdb +} + +vxlan_test_flush_by_nhid() +{ + local nhid_1=100 + local nhid_2=200 + + nexthops_add $nhid_1 $nhid_2 + + fdb_add_mac_pool_1 vx10 nhid $nhid_1 + fdb_add_mac_pool_2 vx10 nhid $nhid_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len nhid $nhid_2 + + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_2" + log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1 + log_test $? 0 "Test entries with nhid $nhid_1" + + fdb_check_n_entries_by_dev_filter vx10 0 nhid $nhid_2 + log_test $? 0 "Test entries with nhid $nhid_2" + + # Flush also entries with $nhid_1, and then verify that flushing by + # 'nhid' does not return an error when there are no entries with + # nexthops. + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1" + log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_1" + + fdb_check_n_entries_by_dev_filter vx10 0 nhid + log_test $? 0 "Test entries with 'nhid' keyword" + + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1" + log_test $? 0 "Flush FDB by nhid when there are no entries with nexthop" +} + +vxlan_test_flush_by_state() +{ + local flush_by_state=$1; shift + local state_1=$1; shift + local exp_state_1=$1; shift + local state_2=$1; shift + local exp_state_2=$1; shift + + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + + fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $state_1 + fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $state_2 + + # Check the entries by dst_ip as not all states appear in 'bridge fdb' + # output. + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2 + + run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_state" + log_test $? 0 "Flush FDB by dev vx10 and state $flush_by_state" + + fdb_check_n_entries_by_dev_filter vx10 $exp_state_1 dst $dst_ip_1 + log_test $? 0 "Test entries with state $state_1" + + fdb_check_n_entries_by_dev_filter vx10 $exp_state_2 dst $dst_ip_2 + log_test $? 0 "Test entries with state $state_2" +} + +vxlan_test_flush_by_permanent() +{ + # Entries that are added without state get 'permanent' state by + # default, add some entries with flag 'extern_learn' instead of state, + # so they will be added with 'permanent' and should be flushed also. + local flush_by_state="permanent" + local state_1="permanent" + local exp_state_1=0 + local state_2="extern_learn" + local exp_state_2=0 + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_nopermanent() +{ + local flush_by_state="nopermanent" + local state_1="permanent" + local exp_state_1=$mac_pool_1_len + local state_2="static" + local exp_state_2=0 + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_static() +{ + local flush_by_state="static" + local state_1="static" + local exp_state_1=0 + local state_2="dynamic" + local exp_state_2=$mac_pool_2_len + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_nostatic() +{ + local flush_by_state="nostatic" + local state_1="permanent" + local exp_state_1=$mac_pool_1_len + local state_2="dynamic" + local exp_state_2=0 + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_dynamic() +{ + local flush_by_state="dynamic" + local state_1="dynamic" + local exp_state_1=0 + local state_2="static" + local exp_state_2=$mac_pool_2_len + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_nodynamic() +{ + local flush_by_state="nodynamic" + local state_1="permanent" + local exp_state_1=0 + local state_2="dynamic" + local exp_state_2=$mac_pool_2_len + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_flag() +{ + local flush_by_flag=$1; shift + local flag_1=$1; shift + local exp_flag_1=$1; shift + local flag_2=$1; shift + local exp_flag_2=$1; shift + + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + + fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $flag_1 + fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $flag_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $flag_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $flag_2 + + run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_flag" + log_test $? 0 "Flush FDB by dev vx10 and flag $flush_by_flag" + + fdb_check_n_entries_by_dev_filter vx10 $exp_flag_1 dst $dst_ip_1 + log_test $? 0 "Test entries with flag $flag_1" + + fdb_check_n_entries_by_dev_filter vx10 $exp_flag_2 dst $dst_ip_2 + log_test $? 0 "Test entries with flag $flag_2" +} + +vxlan_test_flush_by_extern_learn() +{ + local flush_by_flag="extern_learn" + local flag_1="extern_learn" + local exp_flag_1=0 + local flag_2="router" + local exp_flag_2=$mac_pool_2_len + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_noextern_learn() +{ + local flush_by_flag="noextern_learn" + local flag_1="extern_learn" + local exp_flag_1=$mac_pool_1_len + local flag_2="router" + local exp_flag_2=0 + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_router() +{ + local flush_by_flag="router" + local flag_1="router" + local exp_flag_1=0 + local flag_2="extern_learn" + local exp_flag_2=$mac_pool_2_len + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_norouter() +{ + + local flush_by_flag="norouter" + local flag_1="router" + local exp_flag_1=$mac_pool_1_len + local flag_2="extern_learn" + local exp_flag_2=0 + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_several_args() +{ + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + local state_1=permanent + local state_2=static + local vni=3000 + local port=1234 + local nhid=100 + local flag=router + local flush_args + + ################### Flush by 2 args - nhid and flag #################### + $IP nexthop add id 10 via 192.0.2.1 fdb + $IP nexthop add id $nhid group 10 fdb + + fdb_add_mac_pool_1 vx10 nhid $nhid $flag $state_1 + fdb_add_mac_pool_2 vx10 nhid $nhid $flag $state_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $state_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $state_2 + + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid $flag" + log_test $? 0 "Flush FDB by dev vx10 nhid $nhid $flag" + + # All entries should be flushed as 'state' is not an argument for flush + # filtering. + fdb_check_n_entries_by_dev_filter vx10 0 $state_1 + log_test $? 0 "Test entries with state $state_1" + + fdb_check_n_entries_by_dev_filter vx10 0 $state_2 + log_test $? 0 "Test entries with state $state_2" + + ################ Flush by 3 args - VNI, port and dst_ip ################ + fdb_add_mac_pool_1 vx10 vni $vni port $port dst $dst_ip_1 + fdb_add_mac_pool_2 vx10 vni $vni port $port dst $dst_ip_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2 + + flush_args="vni $vni port $port dst $dst_ip_2" + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + # Only entries with $dst_ip_2 should be flushed, even the rest arguments + # match the filter, the flush should be AND of all the arguments. + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + log_test $? 0 "Test entries with dst $dst_ip_1" + + fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2 + log_test $? 0 "Test entries with dst $dst_ip_2" +} + +multicast_fdb_entries_add() +{ + mac=00:00:00:00:00:00 + vnis=(2000 3000) + + for vni in "${vnis[@]}"; do + $BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \ + src_vni 5000 + $BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \ + port 1111 + $BRIDGE fdb append $mac dev vx10 dst 192.0.2.2 vni $vni \ + port 2222 + done +} + +vxlan_test_flush_by_remote_attributes() +{ + local flush_args + + # Reconfigure vx10 with 'external' to get 'src_vni' details in + # 'bridge fdb' output + $IP link del dev vx10 + $IP link add name vx10 type vxlan dstport "$VXPORT" external + + # For multicat FDB entries, the VXLAN driver stores a linked list of + # remotes for a given key. Verify that only the expected remotes are + # flushed. + multicast_fdb_entries_add + + ## Flush by 3 remote's attributes - destination IP, port and VNI ## + flush_args="dst 192.0.2.1 port 1111 vni 2000" + fdb_check_n_entries_by_dev_filter vx10 1 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries - 1)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" + + ## Flush by 2 remote's attributes - destination IP and port ## + flush_args="dst 192.0.2.2 port 2222" + + fdb_check_n_entries_by_dev_filter vx10 2 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries - 2)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" + + ## Flush by source VNI, which is not remote's attribute and VNI ## + flush_args="vni 3000 src_vni 5000" + + fdb_check_n_entries_by_dev_filter vx10 1 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries -1)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" + + # Flush by 1 remote's attribute - destination IP ## + flush_args="dst 192.0.2.1" + + fdb_check_n_entries_by_dev_filter vx10 2 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries -2)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" +} + +bridge_test_flush_by_dev() +{ + local dst_ip=192.0.2.1 + local br0_n_ent_t0=$($BRIDGE fdb show dev br0 | wc -l) + local br1_n_ent_t0=$($BRIDGE fdb show dev br1 | wc -l) + + fdb_add_mac_pool_1 br0 dst $dst_ip + fdb_add_mac_pool_2 br1 dst $dst_ip + + # Each 'fdb add' command adds one extra entry in the bridge with the + # default vlan. + local exp_br0_n_ent=$(($br0_n_ent_t0 + 2 * $mac_pool_1_len)) + local exp_br1_n_ent=$(($br1_n_ent_t0 + 2 * $mac_pool_2_len)) + + fdb_check_n_entries_by_dev_filter br0 $exp_br0_n_ent + fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent + + run_cmd "$BRIDGE fdb flush dev br0" + log_test $? 0 "Flush FDB by dev br0" + + # The default entry should not be flushed + fdb_check_n_entries_by_dev_filter br0 1 + log_test $? 0 "Flush FDB by dev br0 - test br0 entries" + + fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent + log_test $? 0 "Flush FDB by dev br0 - test br1 entries" +} + +bridge_test_flush_by_vlan() +{ + local vlan_1=10 + local vlan_2=20 + local vlan_1_ent_t0 + local vlan_2_ent_t0 + + $BRIDGE vlan add vid $vlan_1 dev br0 self + $BRIDGE vlan add vid $vlan_2 dev br0 self + + vlan_1_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_1" | wc -l) + vlan_2_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_2" | wc -l) + + fdb_add_mac_pool_1 br0 vlan $vlan_1 + fdb_add_mac_pool_2 br0 vlan $vlan_2 + + local exp_vlan_1_ent=$(($vlan_1_ent_t0 + $mac_pool_1_len)) + local exp_vlan_2_ent=$(($vlan_2_ent_t0 + $mac_pool_2_len)) + + fdb_check_n_entries_by_dev_filter br0 $exp_vlan_1_ent vlan $vlan_1 + fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2 + + run_cmd "$BRIDGE fdb flush dev br0 vlan $vlan_1" + log_test $? 0 "Flush FDB by dev br0 and vlan $vlan_1" + + fdb_check_n_entries_by_dev_filter br0 0 vlan $vlan_1 + log_test $? 0 "Test entries with vlan $vlan_1" + + fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2 + log_test $? 0 "Test entries with vlan $vlan_2" +} + +bridge_vxlan_test_flush() +{ + local vlan_1=10 + local dst_ip=192.0.2.1 + + $IP link set dev vx10 master br0 + $BRIDGE vlan add vid $vlan_1 dev br0 self + $BRIDGE vlan add vid $vlan_1 dev vx10 + + fdb_add_mac_pool_1 vx10 vni 3000 dst $dst_ip self master + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vlan $vlan_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni 3000 + + # Such command should fail in VXLAN driver as vlan is not supported, + # but the command should flush the entries in the bridge + run_cmd "$BRIDGE fdb flush dev vx10 vlan $vlan_1 master self" + log_test $? 255 \ + "Flush FDB by dev vx10, vlan $vlan_1, master and self" + + fdb_check_n_entries_by_dev_filter vx10 0 vlan $vlan_1 + log_test $? 0 "Test entries with vlan $vlan_1" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip + log_test $? 0 "Test entries with dst $dst_ip" +} + +setup() +{ + IP="ip -netns ns1" + BRIDGE="bridge -netns ns1" + + ip netns add ns1 + + $IP link add name vx10 type vxlan id 1000 dstport "$VXPORT" + $IP link add name vx20 type vxlan id 2000 dstport "$VXPORT" + + $IP link add br0 type bridge vlan_filtering 1 + $IP link add br1 type bridge vlan_filtering 1 +} + +cleanup() +{ + $IP link del dev br1 + $IP link del dev br0 + + $IP link del dev vx20 + $IP link del dev vx10 + + ip netns del ns1 +} + +################################################################################ +# main + +while getopts :t:pPhvw: o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + w) PING_TIMEOUT=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +# Check a flag that is added to flush command as part of VXLAN flush support +bridge fdb help 2>&1 | grep -q "\[no\]router" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing flush command for VXLAN" + exit $ksft_skip +fi + +ip link add dev vx10 type vxlan id 1000 2> /dev/null +out=$(bridge fdb flush dev vx10 2>&1 | grep -q "Operation not supported") +if [ $? -eq 0 ]; then + echo "SKIP: kernel lacks vxlan flush support" + exit $ksft_skip +fi +ip link del dev vx10 + +for t in $TESTS +do + setup; $t; cleanup; +done diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 39a0e01f8554..cd8a58097448 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1864,8 +1864,9 @@ static char *random_msg(int len) n += i; len -= i; } - i = snprintf(m + n, olen - n, "%.*s", len, - "abcdefghijklmnopqrstuvwxyz"); + + snprintf(m + n, olen - n, "%.*s", len, + "abcdefghijklmnopqrstuvwxyz"); return m; } diff --git a/tools/testing/selftests/ptp/ptpchmaskfmt.sh b/tools/testing/selftests/ptp/ptpchmaskfmt.sh new file mode 100644 index 000000000000..0a06ba8af300 --- /dev/null +++ b/tools/testing/selftests/ptp/ptpchmaskfmt.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Simple helper script to transform ptp debugfs timestamp event queue filtering +# masks from decimal values to hexadecimal values + +# Only takes the debugfs mask file path as an argument +DEBUGFS_MASKFILE="${1}" + +#shellcheck disable=SC2013,SC2086 +for int in $(cat "$DEBUGFS_MASKFILE") ; do + printf '0x%08X ' "$int" +done +echo diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index c9f6cca4feb4..011252fe238c 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -121,6 +121,7 @@ static void usage(char *progname) " -d name device to open\n" " -e val read 'val' external time stamp events\n" " -f val adjust the ptp clock frequency by 'val' ppb\n" + " -F chan Enable single channel mask and keep device open for debugfs verification.\n" " -g get the ptp clock time\n" " -h prints this message\n" " -i val index for event/trigger\n" @@ -187,6 +188,7 @@ int main(int argc, char *argv[]) int pps = -1; int seconds = 0; int settime = 0; + int channel = -1; int64_t t1, t2, tp; int64_t interval, offset; @@ -196,7 +198,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xz"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xz"))) { switch (c) { case 'c': capabilities = 1; @@ -210,6 +212,9 @@ int main(int argc, char *argv[]) case 'f': adjfreq = atoi(optarg); break; + case 'F': + channel = atoi(optarg); + break; case 'g': gettime = 1; break; @@ -604,6 +609,18 @@ int main(int argc, char *argv[]) free(xts); } + if (channel >= 0) { + if (ioctl(fd, PTP_MASK_CLEAR_ALL)) { + perror("PTP_MASK_CLEAR_ALL"); + } else if (ioctl(fd, PTP_MASK_EN_SINGLE, (unsigned int *)&channel)) { + perror("PTP_MASK_EN_SINGLE"); + } else { + printf("Channel %d exclusively enabled. Check on debugfs.\n", channel); + printf("Press any key to continue\n."); + getchar(); + } + } + close(fd); return 0; } diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore index a8adcfdc292b..d9f798713cd7 100644 --- a/tools/testing/vsock/.gitignore +++ b/tools/testing/vsock/.gitignore @@ -3,3 +3,4 @@ vsock_test vsock_diag_test vsock_perf +vsock_uring_test diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 21a98ba565ab..a7f56a09ca9f 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,12 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-only all: test vsock_perf -test: vsock_test vsock_diag_test -vsock_test: vsock_test.o timeout.o control.o util.o +test: vsock_test vsock_diag_test vsock_uring_test +vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_zerocopy_common.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o -vsock_perf: vsock_perf.o +vsock_perf: vsock_perf.o msg_zerocopy_common.o + +vsock_uring_test: LDLIBS = -luring +vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean clean: - ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf + ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf vsock_uring_test -include *.d diff --git a/tools/testing/vsock/msg_zerocopy_common.c b/tools/testing/vsock/msg_zerocopy_common.c new file mode 100644 index 000000000000..5a4bdf7b5132 --- /dev/null +++ b/tools/testing/vsock/msg_zerocopy_common.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Some common code for MSG_ZEROCOPY logic + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/errqueue.h> + +#include "msg_zerocopy_common.h" + +void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + +void vsock_recv_completion(int fd, const bool *zerocopied) +{ + struct sock_extended_err *serr; + struct msghdr msg = { 0 }; + char cmsg_data[128]; + struct cmsghdr *cm; + ssize_t res; + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + res = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (res) { + fprintf(stderr, "failed to read error queue: %zi\n", res); + exit(EXIT_FAILURE); + } + + cm = CMSG_FIRSTHDR(&msg); + if (!cm) { + fprintf(stderr, "cmsg: no cmsg\n"); + exit(EXIT_FAILURE); + } + + if (cm->cmsg_level != SOL_VSOCK) { + fprintf(stderr, "cmsg: unexpected 'cmsg_level'\n"); + exit(EXIT_FAILURE); + } + + if (cm->cmsg_type != VSOCK_RECVERR) { + fprintf(stderr, "cmsg: unexpected 'cmsg_type'\n"); + exit(EXIT_FAILURE); + } + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + fprintf(stderr, "serr: wrong origin: %u\n", serr->ee_origin); + exit(EXIT_FAILURE); + } + + if (serr->ee_errno) { + fprintf(stderr, "serr: wrong error code: %u\n", serr->ee_errno); + exit(EXIT_FAILURE); + } + + /* This flag is used for tests, to check that transmission was + * performed as expected: zerocopy or fallback to copy. If NULL + * - don't care. + */ + if (!zerocopied) + return; + + if (*zerocopied && (serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) { + fprintf(stderr, "serr: was copy instead of zerocopy\n"); + exit(EXIT_FAILURE); + } + + if (!*zerocopied && !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) { + fprintf(stderr, "serr: was zerocopy instead of copy\n"); + exit(EXIT_FAILURE); + } +} diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h new file mode 100644 index 000000000000..3763c5ccedb9 --- /dev/null +++ b/tools/testing/vsock/msg_zerocopy_common.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef MSG_ZEROCOPY_COMMON_H +#define MSG_ZEROCOPY_COMMON_H + +#include <stdbool.h> + +#ifndef SOL_VSOCK +#define SOL_VSOCK 287 +#endif + +#ifndef VSOCK_RECVERR +#define VSOCK_RECVERR 1 +#endif + +void enable_so_zerocopy(int fd); +void vsock_recv_completion(int fd, const bool *zerocopied); + +#endif /* MSG_ZEROCOPY_COMMON_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 6779d5008b27..92336721321a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -11,10 +11,12 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> #include <signal.h> #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/mman.h> #include "timeout.h" #include "control.h" @@ -444,3 +446,134 @@ unsigned long hash_djb2(const void *data, size_t len) return hash; } + +size_t iovec_bytes(const struct iovec *iov, size_t iovnum) +{ + size_t bytes; + int i; + + for (bytes = 0, i = 0; i < iovnum; i++) + bytes += iov[i].iov_len; + + return bytes; +} + +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum) +{ + unsigned long hash; + size_t iov_bytes; + size_t offs; + void *tmp; + int i; + + iov_bytes = iovec_bytes(iov, iovnum); + + tmp = malloc(iov_bytes); + if (!tmp) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for (offs = 0, i = 0; i < iovnum; i++) { + memcpy(tmp + offs, iov[i].iov_base, iov[i].iov_len); + offs += iov[i].iov_len; + } + + hash = hash_djb2(tmp, iov_bytes); + free(tmp); + + return hash; +} + +/* Allocates and returns new 'struct iovec *' according pattern + * in the 'test_iovec'. For each element in the 'test_iovec' it + * allocates new element in the resulting 'iovec'. 'iov_len' + * of the new element is copied from 'test_iovec'. 'iov_base' is + * allocated depending on the 'iov_base' of 'test_iovec': + * + * 'iov_base' == NULL -> valid buf: mmap('iov_len'). + * + * 'iov_base' == MAP_FAILED -> invalid buf: + * mmap('iov_len'), then munmap('iov_len'). + * 'iov_base' still contains result of + * mmap(). + * + * 'iov_base' == number -> unaligned valid buf: + * mmap('iov_len') + number. + * + * 'iovnum' is number of elements in 'test_iovec'. + * + * Returns new 'iovec' or calls 'exit()' on error. + */ +struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum) +{ + struct iovec *iovec; + int i; + + iovec = malloc(sizeof(*iovec) * iovnum); + if (!iovec) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < iovnum; i++) { + iovec[i].iov_len = test_iovec[i].iov_len; + + iovec[i].iov_base = mmap(NULL, iovec[i].iov_len, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, 0); + if (iovec[i].iov_base == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + if (test_iovec[i].iov_base != MAP_FAILED) + iovec[i].iov_base += (uintptr_t)test_iovec[i].iov_base; + } + + /* Unmap "invalid" elements. */ + for (i = 0; i < iovnum; i++) { + if (test_iovec[i].iov_base == MAP_FAILED) { + if (munmap(iovec[i].iov_base, iovec[i].iov_len)) { + perror("munmap"); + exit(EXIT_FAILURE); + } + } + } + + for (i = 0; i < iovnum; i++) { + int j; + + if (test_iovec[i].iov_base == MAP_FAILED) + continue; + + for (j = 0; j < iovec[i].iov_len; j++) + ((uint8_t *)iovec[i].iov_base)[j] = rand() & 0xff; + } + + return iovec; +} + +/* Frees 'iovec *', previously allocated by 'alloc_test_iovec()'. + * On error calls 'exit()'. + */ +void free_test_iovec(const struct iovec *test_iovec, + struct iovec *iovec, int iovnum) +{ + int i; + + for (i = 0; i < iovnum; i++) { + if (test_iovec[i].iov_base != MAP_FAILED) { + if (test_iovec[i].iov_base) + iovec[i].iov_base -= (uintptr_t)test_iovec[i].iov_base; + + if (munmap(iovec[i].iov_base, iovec[i].iov_len)) { + perror("munmap"); + exit(EXIT_FAILURE); + } + } + } + + free(iovec); +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e5407677ce05..a77175d25864 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -53,4 +53,9 @@ void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); +size_t iovec_bytes(const struct iovec *iov, size_t iovnum); +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); +struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum); +void free_test_iovec(const struct iovec *test_iovec, + struct iovec *iovec, int iovnum); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index a72520338f84..4e8578f815e0 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -18,6 +18,9 @@ #include <poll.h> #include <sys/socket.h> #include <linux/vm_sockets.h> +#include <sys/mman.h> + +#include "msg_zerocopy_common.h" #define DEFAULT_BUF_SIZE_BYTES (128 * 1024) #define DEFAULT_TO_SEND_BYTES (64 * 1024) @@ -31,6 +34,7 @@ static unsigned int port = DEFAULT_PORT; static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static bool zerocopy; static void error(const char *s) { @@ -252,10 +256,15 @@ static void run_sender(int peer_cid, unsigned long to_send_bytes) time_t tx_begin_ns; time_t tx_total_ns; size_t total_send; + time_t time_in_send; void *data; int fd; - printf("Run as sender\n"); + if (zerocopy) + printf("Run as sender MSG_ZEROCOPY\n"); + else + printf("Run as sender\n"); + printf("Connect to %i:%u\n", peer_cid, port); printf("Send %lu bytes\n", to_send_bytes); printf("TX buffer %lu bytes\n", buf_size_bytes); @@ -265,38 +274,82 @@ static void run_sender(int peer_cid, unsigned long to_send_bytes) if (fd < 0) exit(EXIT_FAILURE); - data = malloc(buf_size_bytes); + if (zerocopy) { + enable_so_zerocopy(fd); - if (!data) { - fprintf(stderr, "'malloc()' failed\n"); - exit(EXIT_FAILURE); + data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (data == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + } else { + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } } memset(data, 0, buf_size_bytes); total_send = 0; + time_in_send = 0; tx_begin_ns = current_nsec(); while (total_send < to_send_bytes) { ssize_t sent; + size_t rest_bytes; + time_t before; - sent = write(fd, data, buf_size_bytes); + rest_bytes = to_send_bytes - total_send; + + before = current_nsec(); + sent = send(fd, data, (rest_bytes > buf_size_bytes) ? + buf_size_bytes : rest_bytes, + zerocopy ? MSG_ZEROCOPY : 0); + time_in_send += (current_nsec() - before); if (sent <= 0) error("write"); total_send += sent; + + if (zerocopy) { + struct pollfd fds = { 0 }; + + fds.fd = fd; + + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (!(fds.revents & POLLERR)) { + fprintf(stderr, "POLLERR expected\n"); + exit(EXIT_FAILURE); + } + + vsock_recv_completion(fd, NULL); + } } tx_total_ns = current_nsec() - tx_begin_ns; printf("total bytes sent: %zu\n", total_send); printf("tx performance: %f Gbits/s\n", - get_gbps(total_send * 8, tx_total_ns)); - printf("total time in 'write()': %f sec\n", + get_gbps(total_send * 8, time_in_send)); + printf("total time in tx loop: %f sec\n", (float)tx_total_ns / NSEC_PER_SEC); + printf("time in 'send()': %f sec\n", + (float)time_in_send / NSEC_PER_SEC); close(fd); - free(data); + + if (zerocopy) + munmap(data, buf_size_bytes); + else + free(data); } static const char optstring[] = ""; @@ -336,6 +389,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'R', }, + { + .name = "zerocopy", + .has_arg = no_argument, + .val = 'Z', + }, {}, }; @@ -351,6 +409,7 @@ static void usage(void) " --help This message\n" " --sender <cid> Sender mode (receiver default)\n" " <cid> of the receiver to connect to\n" + " --zerocopy Enable zerocopy (for sender mode only)\n" " --port <port> Port (default %d)\n" " --bytes <bytes>KMG Bytes to send (default %d)\n" " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" @@ -413,6 +472,9 @@ int main(int argc, char **argv) case 'H': /* Help. */ usage(); break; + case 'Z': /* Zerocopy. */ + zerocopy = true; + break; default: usage(); } diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index da4cb819a183..c1f7bc9abd22 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -21,6 +21,7 @@ #include <poll.h> #include <signal.h> +#include "vsock_test_zerocopy.h" #include "timeout.h" #include "control.h" #include "util.h" @@ -1269,6 +1270,21 @@ static struct test_case test_cases[] = { .run_client = test_stream_shutrd_client, .run_server = test_stream_shutrd_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY", + .run_client = test_stream_msgzcopy_client, + .run_server = test_stream_msgzcopy_server, + }, + { + .name = "SOCK_SEQPACKET MSG_ZEROCOPY", + .run_client = test_seqpacket_msgzcopy_client, + .run_server = test_seqpacket_msgzcopy_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY empty MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_empty_errq_client, + .run_server = test_stream_msgzcopy_empty_errq_server, + }, {}, }; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c new file mode 100644 index 000000000000..a16ff76484e6 --- /dev/null +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* MSG_ZEROCOPY feature tests for vsock + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> +#include <poll.h> +#include <linux/errqueue.h> +#include <linux/kernel.h> +#include <errno.h> + +#include "control.h" +#include "vsock_test_zerocopy.h" +#include "msg_zerocopy_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define VSOCK_TEST_DATA_MAX_IOV 3 + +struct vsock_test_data { + /* This test case if for SOCK_STREAM only. */ + bool stream_only; + /* Data must be zerocopied. This field is checked against + * field 'ee_code' of the 'struct sock_extended_err', which + * contains bit to detect that zerocopy transmission was + * fallbacked to copy mode. + */ + bool zerocopied; + /* Enable SO_ZEROCOPY option on the socket. Without enabled + * SO_ZEROCOPY, every MSG_ZEROCOPY transmission will behave + * like without MSG_ZEROCOPY flag. + */ + bool so_zerocopy; + /* 'errno' after 'sendmsg()' call. */ + int sendmsg_errno; + /* Number of valid elements in 'vecs'. */ + int vecs_cnt; + struct iovec vecs[VSOCK_TEST_DATA_MAX_IOV]; +}; + +static struct vsock_test_data test_data_array[] = { + /* Last element has non-page aligned size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, PAGE_SIZE }, + { NULL, 200 } + } + }, + /* All elements have page aligned base and size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, PAGE_SIZE * 2 }, + { NULL, PAGE_SIZE * 3 } + } + }, + /* All elements have page aligned base and size. But + * data length is bigger than 64Kb. + */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE * 16 }, + { NULL, PAGE_SIZE * 16 }, + { NULL, PAGE_SIZE * 16 } + } + }, + /* Middle element has both non-page aligned base and size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { (void *)1, 100 }, + { NULL, PAGE_SIZE } + } + }, + /* Middle element is unmapped. */ + { + .zerocopied = false, + .so_zerocopy = true, + .sendmsg_errno = ENOMEM, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { MAP_FAILED, PAGE_SIZE }, + { NULL, PAGE_SIZE } + } + }, + /* Valid data, but SO_ZEROCOPY is off. This + * will trigger fallback to copy. + */ + { + .zerocopied = false, + .so_zerocopy = false, + .sendmsg_errno = 0, + .vecs_cnt = 1, + { + { NULL, PAGE_SIZE } + } + }, + /* Valid data, but message is bigger than peer's + * buffer, so this will trigger fallback to copy. + * This test is for SOCK_STREAM only, because + * for SOCK_SEQPACKET, 'sendmsg()' returns EMSGSIZE. + */ + { + .stream_only = true, + .zerocopied = false, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 1, + { + { NULL, 100 * PAGE_SIZE } + } + }, +}; + +#define POLL_TIMEOUT_MS 100 + +static void test_client(const struct test_opts *opts, + const struct vsock_test_data *test_data, + bool sock_seqpacket) +{ + struct pollfd fds = { 0 }; + struct msghdr msg = { 0 }; + ssize_t sendmsg_res; + struct iovec *iovec; + int fd; + + if (sock_seqpacket) + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + else + fd = vsock_stream_connect(opts->peer_cid, 1234); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (test_data->so_zerocopy) + enable_so_zerocopy(fd); + + iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); + + msg.msg_iov = iovec; + msg.msg_iovlen = test_data->vecs_cnt; + + errno = 0; + + sendmsg_res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (errno != test_data->sendmsg_errno) { + fprintf(stderr, "expected 'errno' == %i, got %i\n", + test_data->sendmsg_errno, errno); + exit(EXIT_FAILURE); + } + + if (!errno) { + if (sendmsg_res != iovec_bytes(iovec, test_data->vecs_cnt)) { + fprintf(stderr, "expected 'sendmsg()' == %li, got %li\n", + iovec_bytes(iovec, test_data->vecs_cnt), + sendmsg_res); + exit(EXIT_FAILURE); + } + } + + fds.fd = fd; + fds.events = 0; + + if (poll(&fds, 1, POLL_TIMEOUT_MS) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + vsock_recv_completion(fd, &test_data->zerocopied); + } else if (test_data->so_zerocopy && !test_data->sendmsg_errno) { + /* If we don't have data in the error queue, but + * SO_ZEROCOPY was enabled and 'sendmsg()' was + * successful - this is an error. + */ + fprintf(stderr, "POLLERR expected\n"); + exit(EXIT_FAILURE); + } + + if (!test_data->sendmsg_errno) + control_writeulong(iovec_hash_djb2(iovec, test_data->vecs_cnt)); + else + control_writeulong(0); + + control_writeln("DONE"); + free_test_iovec(test_data->vecs, iovec, test_data->vecs_cnt); + close(fd); +} + +void test_stream_msgzcopy_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + test_client(opts, &test_data_array[i], false); +} + +void test_seqpacket_msgzcopy_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) { + if (test_data_array[i].stream_only) + continue; + + test_client(opts, &test_data_array[i], true); + } +} + +static void test_server(const struct test_opts *opts, + const struct vsock_test_data *test_data, + bool sock_seqpacket) +{ + unsigned long remote_hash; + unsigned long local_hash; + ssize_t total_bytes_rec; + unsigned char *data; + size_t data_len; + int fd; + + if (sock_seqpacket) + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + else + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + data_len = iovec_bytes(test_data->vecs, test_data->vecs_cnt); + + data = malloc(data_len); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + total_bytes_rec = 0; + + while (total_bytes_rec != data_len) { + ssize_t bytes_rec; + + bytes_rec = read(fd, data + total_bytes_rec, + data_len - total_bytes_rec); + if (bytes_rec <= 0) + break; + + total_bytes_rec += bytes_rec; + } + + if (test_data->sendmsg_errno == 0) + local_hash = hash_djb2(data, data_len); + else + local_hash = 0; + + free(data); + + /* Waiting for some result. */ + remote_hash = control_readulong(); + if (remote_hash != local_hash) { + fprintf(stderr, "hash mismatch\n"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} + +void test_stream_msgzcopy_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + test_server(opts, &test_data_array[i], false); +} + +void test_seqpacket_msgzcopy_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) { + if (test_data_array[i].stream_only) + continue; + + test_server(opts, &test_data_array[i], true); + } +} + +void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts) +{ + struct msghdr msg = { 0 }; + char cmsg_data[128]; + ssize_t res; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + res = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (res != -1) { + fprintf(stderr, "expected 'recvmsg(2)' failure, got %zi\n", + res); + exit(EXIT_FAILURE); + } + + control_writeln("DONE"); + close(fd); +} + +void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} diff --git a/tools/testing/vsock/vsock_test_zerocopy.h b/tools/testing/vsock/vsock_test_zerocopy.h new file mode 100644 index 000000000000..3ef2579e024d --- /dev/null +++ b/tools/testing/vsock/vsock_test_zerocopy.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef VSOCK_TEST_ZEROCOPY_H +#define VSOCK_TEST_ZEROCOPY_H +#include "util.h" + +void test_stream_msgzcopy_client(const struct test_opts *opts); +void test_stream_msgzcopy_server(const struct test_opts *opts); + +void test_seqpacket_msgzcopy_client(const struct test_opts *opts); +void test_seqpacket_msgzcopy_server(const struct test_opts *opts); + +void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts); +void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts); + +#endif /* VSOCK_TEST_ZEROCOPY_H */ diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c new file mode 100644 index 000000000000..d976d35f0ba9 --- /dev/null +++ b/tools/testing/vsock/vsock_uring_test.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* io_uring tests for vsock + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <liburing.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/kernel.h> +#include <error.h> + +#include "util.h" +#include "control.h" +#include "msg_zerocopy_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define RING_ENTRIES_NUM 4 + +#define VSOCK_TEST_DATA_MAX_IOV 3 + +struct vsock_io_uring_test { + /* Number of valid elements in 'vecs'. */ + int vecs_cnt; + struct iovec vecs[VSOCK_TEST_DATA_MAX_IOV]; +}; + +static struct vsock_io_uring_test test_data_array[] = { + /* All elements have page aligned base and size. */ + { + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, 2 * PAGE_SIZE }, + { NULL, 3 * PAGE_SIZE }, + } + }, + /* Middle element has both non-page aligned base and size. */ + { + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { (void *)1, 200 }, + { NULL, 3 * PAGE_SIZE }, + } + } +}; + +static void vsock_io_uring_client(const struct test_opts *opts, + const struct vsock_io_uring_test *test_data, + bool msg_zerocopy) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + struct iovec *iovec; + struct msghdr msg; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (msg_zerocopy) + enable_so_zerocopy(fd); + + iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); + + if (io_uring_queue_init(RING_ENTRIES_NUM, &ring, 0)) + error(1, errno, "io_uring_queue_init"); + + if (io_uring_register_buffers(&ring, iovec, test_data->vecs_cnt)) + error(1, errno, "io_uring_register_buffers"); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iovec; + msg.msg_iovlen = test_data->vecs_cnt; + sqe = io_uring_get_sqe(&ring); + + if (msg_zerocopy) + io_uring_prep_sendmsg_zc(sqe, fd, &msg, 0); + else + io_uring_prep_sendmsg(sqe, fd, &msg, 0); + + if (io_uring_submit(&ring) != 1) + error(1, errno, "io_uring_submit"); + + if (io_uring_wait_cqe(&ring, &cqe)) + error(1, errno, "io_uring_wait_cqe"); + + io_uring_cqe_seen(&ring, cqe); + + control_writeulong(iovec_hash_djb2(iovec, test_data->vecs_cnt)); + + control_writeln("DONE"); + io_uring_queue_exit(&ring); + free_test_iovec(test_data->vecs, iovec, test_data->vecs_cnt); + close(fd); +} + +static void vsock_io_uring_server(const struct test_opts *opts, + const struct vsock_io_uring_test *test_data) +{ + unsigned long remote_hash; + unsigned long local_hash; + struct io_uring ring; + size_t data_len; + size_t recv_len; + void *data; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + data_len = iovec_bytes(test_data->vecs, test_data->vecs_cnt); + + data = malloc(data_len); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + if (io_uring_queue_init(RING_ENTRIES_NUM, &ring, 0)) + error(1, errno, "io_uring_queue_init"); + + recv_len = 0; + + while (recv_len < data_len) { + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iovec; + + sqe = io_uring_get_sqe(&ring); + iovec.iov_base = data + recv_len; + iovec.iov_len = data_len; + + io_uring_prep_readv(sqe, fd, &iovec, 1, 0); + + if (io_uring_submit(&ring) != 1) + error(1, errno, "io_uring_submit"); + + if (io_uring_wait_cqe(&ring, &cqe)) + error(1, errno, "io_uring_wait_cqe"); + + recv_len += cqe->res; + io_uring_cqe_seen(&ring, cqe); + } + + if (recv_len != data_len) { + fprintf(stderr, "expected %zu, got %zu\n", data_len, + recv_len); + exit(EXIT_FAILURE); + } + + local_hash = hash_djb2(data, data_len); + + remote_hash = control_readulong(); + if (remote_hash != local_hash) { + fprintf(stderr, "hash mismatch\n"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + io_uring_queue_exit(&ring); + free(data); +} + +void test_stream_uring_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_server(opts, &test_data_array[i]); +} + +void test_stream_uring_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_client(opts, &test_data_array[i], false); +} + +void test_stream_uring_msg_zc_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_server(opts, &test_data_array[i]); +} + +void test_stream_uring_msg_zc_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_client(opts, &test_data_array[i], true); +} + +static struct test_case test_cases[] = { + { + .name = "SOCK_STREAM io_uring test", + .run_server = test_stream_uring_server, + .run_client = test_stream_uring_client, + }, + { + .name = "SOCK_STREAM io_uring MSG_ZEROCOPY test", + .run_server = test_stream_uring_msg_zc_server, + .run_client = test_stream_uring_msg_zc_client, + }, + {}, +}; + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "control-host", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "control-port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "mode", + .has_arg = required_argument, + .val = 'm', + }, + { + .name = "peer-cid", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "help", + .has_arg = no_argument, + .val = '?', + }, + {}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n" + "\n" + " Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n" + " Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" + "\n" + "Run transmission tests using io_uring. Usage is the same as\n" + "in ./vsock_test\n" + "\n" + "Options:\n" + " --help This help message\n" + " --control-host <host> Server IP address to connect to\n" + " --control-port <port> Server port to listen on/connect to\n" + " --mode client|server Server or client mode\n" + " --peer-cid <cid> CID of the other side\n" + ); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + const char *control_host = NULL; + const char *control_port = NULL; + struct test_opts opts = { + .mode = TEST_MODE_UNSET, + .peer_cid = VMADDR_CID_ANY, + }; + + init_signals(); + + for (;;) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'H': + control_host = optarg; + break; + case 'm': + if (strcmp(optarg, "client") == 0) { + opts.mode = TEST_MODE_CLIENT; + } else if (strcmp(optarg, "server") == 0) { + opts.mode = TEST_MODE_SERVER; + } else { + fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); + return EXIT_FAILURE; + } + break; + case 'p': + opts.peer_cid = parse_cid(optarg); + break; + case 'P': + control_port = optarg; + break; + case '?': + default: + usage(); + } + } + + if (!control_port) + usage(); + if (opts.mode == TEST_MODE_UNSET) + usage(); + if (opts.peer_cid == VMADDR_CID_ANY) + usage(); + + if (!control_host) { + if (opts.mode != TEST_MODE_SERVER) + usage(); + control_host = "0.0.0.0"; + } + + control_init(control_host, control_port, + opts.mode == TEST_MODE_SERVER); + + run_tests(test_cases, &opts); + + control_cleanup(); + + return 0; +} |